From 3bdff33bd4cbdc501a4bdb8fdf220676b8042bb6 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk[kaf24]" Date: Wed, 10 Mar 2004 13:46:20 +0000 Subject: [PATCH] bitkeeper revision 1.773 (404f1c2crOagpEppZ6uF3JohTX-G5g) xen.patchF --- xen/arch/i386/Rules.mk | 5 +- xen/arch/i386/entry.S | 2 +- xen/arch/i386/process.c | 1 + xen/arch/i386/time.c | 2 +- xen/arch/x86_64/Rules.mk | 5 +- xen/common/Makefile | 2 +- xen/common/memory.c | 1 + xen/include/asm-i386/config.h | 6 +- xen/include/asm-x86_64/apic.h | 16 +- xen/include/asm-x86_64/apicdef.h | 17 +- xen/include/asm-x86_64/atomic.h | 10 + xen/include/asm-x86_64/bitops.h | 155 +++-- xen/include/asm-x86_64/byteorder.h | 43 +- xen/include/asm-x86_64/cache.h | 6 +- xen/include/asm-x86_64/config.h | 29 +- xen/include/asm-x86_64/cpufeature.h | 9 +- xen/include/asm-x86_64/current.h | 39 +- xen/include/asm-x86_64/debugreg.h | 7 +- xen/include/asm-x86_64/delay.h | 6 +- xen/include/asm-x86_64/desc.h | 86 ++- xen/include/asm-x86_64/domain_page.h | 8 +- xen/include/asm-x86_64/fixmap.h | 2 +- xen/include/asm-x86_64/hdreg.h | 11 +- xen/include/asm-x86_64/ide.h | 13 +- xen/include/asm-x86_64/io.h | 263 ++++----- xen/include/asm-x86_64/io_apic.h | 2 +- xen/include/asm-x86_64/ioctl.h | 8 +- xen/include/asm-x86_64/irq.h | 86 +-- xen/include/asm-x86_64/mpspec.h | 33 +- xen/include/asm-x86_64/msr.h | 165 ++++-- xen/include/asm-x86_64/page.h | 165 +++++- xen/include/asm-x86_64/param.h | 4 +- xen/include/asm-x86_64/pci.h | 182 +++--- xen/include/asm-x86_64/pgalloc.h | 44 +- xen/include/asm-x86_64/processor.h | 375 ++++++------ xen/include/asm-x86_64/ptrace.h | 107 +++- xen/include/asm-x86_64/rwlock.h | 19 +- xen/include/asm-x86_64/scatterlist.h | 4 +- xen/include/asm-x86_64/smp.h | 44 +- xen/include/asm-x86_64/spinlock.h | 59 +- xen/include/asm-x86_64/string.h | 492 +-------------- xen/include/asm-x86_64/system.h | 82 +-- xen/include/asm-x86_64/timex.h | 71 ++- xen/include/asm-x86_64/types.h | 16 +- xen/include/asm-x86_64/uaccess.h | 558 +++++------------- xen/include/asm-x86_64/unaligned.h | 6 +- xen/include/hypervisor-ifs/dom0_ops.h | 23 - xen/include/hypervisor-ifs/hypervisor-if.h | 82 +-- .../if-i386/hypervisor-if-arch.h | 290 +-------- .../if-x86_64/hypervisor-if-arch.h | 326 ++-------- xen/include/xeno/sched.h | 23 +- 51 files changed, 1464 insertions(+), 2546 deletions(-) diff --git a/xen/arch/i386/Rules.mk b/xen/arch/i386/Rules.mk index 56f5932b6d..2b7225d7fa 100644 --- a/xen/arch/i386/Rules.mk +++ b/xen/arch/i386/Rules.mk @@ -11,7 +11,8 @@ CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing CFLAGS += -iwithprefix include -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG #CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -CFLAGS += -Wno-pointer-arith -Wredundant-decls -LDFLAGS := -T xeno.lds -N +CFLAGS += -Wno-pointer-arith -Wredundant-decls -m32 +LDARCHFLAGS := --oformat elf32-i386 +LDFLAGS := -T xeno.lds -N diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index bc0fc732a8..1ac45dd3de 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -202,7 +202,7 @@ multicall_fault6: multicall_return_from_call: addl $20,%esp popl %ecx - addl $BYTES_PER_MULTICALL_ENTRY,%ebx + addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx loop multicall_loop popl %ebx xorl %eax,%eax diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c index 7df4f6d37c..09170307a7 100644 --- a/xen/arch/i386/process.c +++ b/xen/arch/i386/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c index 8d328c34b6..947408882d 100644 --- a/xen/arch/i386/time.c +++ b/xen/arch/i386/time.c @@ -282,7 +282,7 @@ void update_dom_time(shared_info_t *si) /* NB. These two values don't actually ever change. */ si->cpu_freq = cpu_freq; - si->rdtsc_bitshift = rdtsc_bitshift; + si->arch.rdtsc_bitshift = rdtsc_bitshift; si->system_time = stime_irq; si->tsc_timestamp = tsc_irq; diff --git a/xen/arch/x86_64/Rules.mk b/xen/arch/x86_64/Rules.mk index 56f5932b6d..dcb1fb14b6 100644 --- a/xen/arch/x86_64/Rules.mk +++ b/xen/arch/x86_64/Rules.mk @@ -11,7 +11,8 @@ CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing CFLAGS += -iwithprefix include -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG #CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -CFLAGS += -Wno-pointer-arith -Wredundant-decls -LDFLAGS := -T xeno.lds -N +CFLAGS += -Wno-pointer-arith -Wredundant-decls -m64 +LDARCHFLAGS := +LDFLAGS := -T xeno.lds -N diff --git a/xen/common/Makefile b/xen/common/Makefile index 12f1f7d2e9..aa35bdb405 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -2,7 +2,7 @@ include $(BASEDIR)/Rules.mk default: $(OBJS) - $(LD) -r -o common.o $(OBJS) + $(LD) $(LDARCHFLAGS) -r -o common.o $(OBJS) clean: rm -f *.o *~ core diff --git a/xen/common/memory.c b/xen/common/memory.c index a921548ed4..32acc0ac11 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -138,6 +138,7 @@ #include #include #include +#include #ifndef NDEBUG #define MEM_LOG(_f, _a...) \ diff --git a/xen/include/asm-i386/config.h b/xen/include/asm-i386/config.h index bfd99a0725..436459cb45 100644 --- a/xen/include/asm-i386/config.h +++ b/xen/include/asm-i386/config.h @@ -4,8 +4,8 @@ * A Linux-style configuration list. */ -#ifndef __XENO_CONFIG_H__ -#define __XENO_CONFIG_H__ +#ifndef __XENO_I386_CONFIG_H__ +#define __XENO_I386_CONFIG_H__ #define CONFIG_X86 1 @@ -167,4 +167,4 @@ extern unsigned int opt_ser_baud; #endif /* __ASSEMBLY__ */ -#endif /* __XENO_CONFIG_H__ */ +#endif /* __XENO_I386_CONFIG_H__ */ diff --git a/xen/include/asm-x86_64/apic.h b/xen/include/asm-x86_64/apic.h index 705942596c..0bf81715ba 100644 --- a/xen/include/asm-x86_64/apic.h +++ b/xen/include/asm-x86_64/apic.h @@ -20,19 +20,20 @@ * Basic functions accessing APICs. */ -static __inline void apic_write(unsigned long reg, unsigned long v) +static __inline void apic_write(unsigned long reg, unsigned int v) { - *((volatile unsigned long *)(APIC_BASE+reg)) = v; + *((volatile unsigned int *)(APIC_BASE+reg)) = v; + barrier(); } -static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +static __inline void apic_write_atomic(unsigned long reg, unsigned int v) { - xchg((volatile unsigned long *)(APIC_BASE+reg), v); + xchg((volatile unsigned int *)(APIC_BASE+reg), v); } -static __inline unsigned long apic_read(unsigned long reg) +static __inline unsigned int apic_read(unsigned long reg) { - return *((volatile unsigned long *)(APIC_BASE+reg)); + return *((volatile unsigned int *)(APIC_BASE+reg)); } static __inline__ void apic_wait_icr_idle(void) @@ -95,4 +96,7 @@ extern unsigned int nmi_watchdog; #endif /* CONFIG_X86_LOCAL_APIC */ +#define clustered_apic_mode 0 +#define esr_disable 0 + #endif /* __ASM_APIC_H */ diff --git a/xen/include/asm-x86_64/apicdef.h b/xen/include/asm-x86_64/apicdef.h index 227bfca652..8a787c3122 100644 --- a/xen/include/asm-x86_64/apicdef.h +++ b/xen/include/asm-x86_64/apicdef.h @@ -32,8 +32,6 @@ #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) #define APIC_ALL_CPUS 0xFF #define APIC_DFR 0xE0 -#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */ -#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */ #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1<<9) #define APIC_SPIV_APIC_ENABLED (1<<8) @@ -59,7 +57,6 @@ #define APIC_INT_LEVELTRIG 0x08000 #define APIC_INT_ASSERT 0x04000 #define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_PHYSICAL 0x00000 #define APIC_DEST_LOGICAL 0x00800 #define APIC_DM_FIXED 0x00000 #define APIC_DM_LOWEST 0x00100 @@ -110,19 +107,7 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#ifdef CONFIG_X86_CLUSTERED_APIC -#define MAX_IO_APICS 32 -#else -#define MAX_IO_APICS 8 -#endif - - -/* - * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs - * don't broadcast (yet?), but if they did, they might use 0xFFFF. - */ -#define APIC_BROADCAST_ID_XAPIC (0xFF) -#define APIC_BROADCAST_ID_APIC (0x0F) +#define MAX_IO_APICS 16 /* * the local APIC register structure, memory mapped. Not terribly well diff --git a/xen/include/asm-x86_64/atomic.h b/xen/include/asm-x86_64/atomic.h index 9dcdca93f7..f4eb858312 100644 --- a/xen/include/asm-x86_64/atomic.h +++ b/xen/include/asm-x86_64/atomic.h @@ -186,6 +186,16 @@ static __inline__ int atomic_add_negative(int i, atomic_t *v) return c; } + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ +__asm__ __volatile__(LOCK "andl %0,%1" \ +: : "r" (~(mask)),"m" (*addr) : "memory") + +#define atomic_set_mask(mask, addr) \ +__asm__ __volatile__(LOCK "orl %0,%1" \ +: : "r" ((unsigned)mask),"m" (*addr) : "memory") + /* Atomic operations are already serializing on x86 */ #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/xen/include/asm-x86_64/bitops.h b/xen/include/asm-x86_64/bitops.h index 73bcd8ef5f..611bf3bf4b 100644 --- a/xen/include/asm-x86_64/bitops.h +++ b/xen/include/asm-x86_64/bitops.h @@ -1,5 +1,5 @@ -#ifndef _I386_BITOPS_H -#define _I386_BITOPS_H +#ifndef _X86_64_BITOPS_H +#define _X86_64_BITOPS_H /* * Copyright 1992, Linus Torvalds. @@ -33,12 +33,12 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static __inline__ void set_bit(int nr, volatile void * addr) +static __inline__ void set_bit(long nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" + "btsq %1,%0" :"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); } /** @@ -50,12 +50,12 @@ static __inline__ void set_bit(int nr, volatile void * addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static __inline__ void __set_bit(int nr, volatile void * addr) +static __inline__ void __set_bit(long nr, volatile void * addr) { __asm__( - "btsl %1,%0" + "btsq %1,%0" :"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); } /** @@ -68,12 +68,12 @@ static __inline__ void __set_bit(int nr, volatile void * addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static __inline__ void clear_bit(int nr, volatile void * addr) +static __inline__ void clear_bit(long nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" + "btrq %1,%0" :"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); } #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() @@ -87,12 +87,12 @@ static __inline__ void clear_bit(int nr, volatile void * addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static __inline__ void __change_bit(int nr, volatile void * addr) +static __inline__ void __change_bit(long nr, volatile void * addr) { __asm__ __volatile__( - "btcl %1,%0" + "btcq %1,%0" :"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); } /** @@ -104,12 +104,12 @@ static __inline__ void __change_bit(int nr, volatile void * addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static __inline__ void change_bit(int nr, volatile void * addr) +static __inline__ void change_bit(long nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX - "btcl %1,%0" + "btcq %1,%0" :"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); } /** @@ -120,14 +120,14 @@ static __inline__ void change_bit(int nr, volatile void * addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __inline__ int test_and_set_bit(int nr, volatile void * addr) +static __inline__ int test_and_set_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__ __volatile__( LOCK_PREFIX - "btsl %2,%1\n\tsbbl %0,%0" + "btsq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); + :"dIr" (nr) : "memory"); return oldbit; } @@ -140,14 +140,14 @@ static __inline__ int test_and_set_bit(int nr, volatile void * addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static __inline__ int __test_and_set_bit(int nr, volatile void * addr) +static __inline__ int __test_and_set_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__( - "btsl %2,%1\n\tsbbl %0,%0" + "btsq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); return oldbit; } @@ -159,14 +159,14 @@ static __inline__ int __test_and_set_bit(int nr, volatile void * addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +static __inline__ int test_and_clear_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" + "btrq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); + :"dIr" (nr) : "memory"); return oldbit; } @@ -179,26 +179,26 @@ static __inline__ int test_and_clear_bit(int nr, volatile void * addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static __inline__ int __test_and_clear_bit(int nr, volatile void * addr) +static __inline__ int __test_and_clear_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__( - "btrl %2,%1\n\tsbbl %0,%0" + "btrq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr)); + :"dIr" (nr)); return oldbit; } /* WARNING: non atomic and it can be reordered! */ -static __inline__ int __test_and_change_bit(int nr, volatile void * addr) +static __inline__ int __test_and_change_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__ __volatile__( - "btcl %2,%1\n\tsbbl %0,%0" + "btcq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); + :"dIr" (nr) : "memory"); return oldbit; } @@ -210,31 +210,39 @@ static __inline__ int __test_and_change_bit(int nr, volatile void * addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __inline__ int test_and_change_bit(int nr, volatile void * addr) +static __inline__ int test_and_change_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__ __volatile__( LOCK_PREFIX - "btcl %2,%1\n\tsbbl %0,%0" + "btcq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); + :"dIr" (nr) : "memory"); return oldbit; } +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile void * addr); +#endif -static __inline__ int constant_test_bit(int nr, const volatile void * addr) +static __inline__ int constant_test_bit(long nr, const volatile void * addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } -static __inline__ int variable_test_bit(int nr, volatile void * addr) +static __inline__ int variable_test_bit(long nr, volatile void * addr) { - int oldbit; + long oldbit; __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" + "btq %2,%1\n\tsbbq %0,%0" :"=r" (oldbit) - :"m" (ADDR),"Ir" (nr)); + :"m" (ADDR),"dIr" (nr)); return oldbit; } @@ -246,10 +254,10 @@ static __inline__ int variable_test_bit(int nr, volatile void * addr) /** * find_first_zero_bit - find the first zero bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The maximum bitnumber to search * * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. + * containing a bit. -1 when none found. */ static __inline__ int find_first_zero_bit(void * addr, unsigned size) { @@ -258,20 +266,19 @@ static __inline__ int find_first_zero_bit(void * addr, unsigned size) if (!size) return 0; - /* This looks at memory. Mark it volatile to tell gcc not to move it around */ __asm__ __volatile__( "movl $-1,%%eax\n\t" "xorl %%edx,%%edx\n\t" "repe; scasl\n\t" "je 1f\n\t" - "xorl -4(%%edi),%%eax\n\t" - "subl $4,%%edi\n\t" + "xorl -4(%%rdi),%%eax\n\t" + "subq $4,%%rdi\n\t" "bsfl %%eax,%%edx\n" - "1:\tsubl %%ebx,%%edi\n\t" - "shll $3,%%edi\n\t" - "addl %%edi,%%edx" + "1:\tsubq %%rbx,%%rdi\n\t" + "shlq $3,%%rdi\n\t" + "addq %%rdi,%%rdx" :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - :"1" ((size + 31) >> 5), "2" (addr), "b" (addr)); + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory"); return res; } @@ -283,7 +290,7 @@ static __inline__ int find_first_zero_bit(void * addr, unsigned size) */ static __inline__ int find_next_zero_bit (void * addr, int size, int offset) { - unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + unsigned int * p = ((unsigned int *) addr) + (offset >> 5); int set = 0, bit = offset & 31, res; if (bit) { @@ -304,10 +311,36 @@ static __inline__ int find_next_zero_bit (void * addr, int size, int offset) /* * No zero yet, search remaining full bytes for a zero */ - res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr)); return (offset + set + res); } +/* + * Find string of zero bits in a bitmap. -1 when not found. + */ +extern unsigned long +find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len); + +static inline void set_bit_string(unsigned long *bitmap, unsigned long i, + int len) +{ + unsigned long end = i + len; + while (i < end) { + __set_bit(i, bitmap); + i++; + } +} + +static inline void clear_bit_string(unsigned long *bitmap, unsigned long i, + int len) +{ + unsigned long end = i + len; + while (i < end) { + clear_bit(i, bitmap); + i++; + } +} + /** * ffz - find first zero in word. * @word: The word to search @@ -316,12 +349,13 @@ static __inline__ int find_next_zero_bit (void * addr, int size, int offset) */ static __inline__ unsigned long ffz(unsigned long word) { - __asm__("bsfl %1,%0" + __asm__("bsfq %1,%0" :"=r" (word) :"r" (~word)); return word; } + /** * ffs - find first bit set * @x: the word to search @@ -352,6 +386,8 @@ static __inline__ int ffs(int x) #define hweight16(x) generic_hweight16(x) #define hweight8(x) generic_hweight8(x) + + #define ext2_set_bit __test_and_set_bit #define ext2_clear_bit __test_and_clear_bit #define ext2_test_bit test_bit @@ -365,4 +401,5 @@ static __inline__ int ffs(int x) #define minix_test_bit(nr,addr) test_bit(nr,addr) #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) -#endif /* _I386_BITOPS_H */ + +#endif /* _X86_64_BITOPS_H */ diff --git a/xen/include/asm-x86_64/byteorder.h b/xen/include/asm-x86_64/byteorder.h index bbfb629fae..daef6afb4f 100644 --- a/xen/include/asm-x86_64/byteorder.h +++ b/xen/include/asm-x86_64/byteorder.h @@ -1,47 +1,32 @@ -#ifndef _I386_BYTEORDER_H -#define _I386_BYTEORDER_H +#ifndef _X86_64_BYTEORDER_H +#define _X86_64_BYTEORDER_H #include #ifdef __GNUC__ -/* For avoiding bswap on i386 */ -#ifdef __KERNEL__ -#include -#endif - -static __inline__ __const__ __u32 ___arch__swab32(__u32 x) +static __inline__ __const__ __u64 ___arch__swab64(__u64 x) { -#ifdef CONFIG_X86_BSWAP - __asm__("bswap %0" : "=r" (x) : "0" (x)); -#else - __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */ - "rorl $16,%0\n\t" /* swap words */ - "xchgb %b0,%h0" /* swap higher bytes */ - :"=q" (x) - : "0" (x)); -#endif + __asm__("bswapq %0" : "=r" (x) : "0" (x)); return x; } -static __inline__ __const__ __u16 ___arch__swab16(__u16 x) +static __inline__ __const__ __u32 ___arch__swab32(__u32 x) { - __asm__("xchgb %b0,%h0" /* swap bytes */ \ - : "=q" (x) \ - : "0" (x)); \ - return x; + __asm__("bswapl %0" : "=r" (x) : "0" (x)); + return x; } -#define __arch__swab32(x) ___arch__swab32(x) -#define __arch__swab16(x) ___arch__swab16(x) +/* Do not define swab16. Gcc is smart enought to recognize "C" version and + convert it into rotation or exhange. */ -#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) -# define __BYTEORDER_HAS_U64__ -# define __SWAB_64_THRU_32__ -#endif +#define __arch__swab32(x) ___arch__swab32(x) +#define __arch__swab64(x) ___arch__swab64(x) #endif /* __GNUC__ */ +#define __BYTEORDER_HAS_U64__ + #include -#endif /* _I386_BYTEORDER_H */ +#endif /* _X86_64_BYTEORDER_H */ diff --git a/xen/include/asm-x86_64/cache.h b/xen/include/asm-x86_64/cache.h index 502c8ba7a6..1def45b0b7 100644 --- a/xen/include/asm-x86_64/cache.h +++ b/xen/include/asm-x86_64/cache.h @@ -1,8 +1,8 @@ /* - * include/asm-i386/cache.h + * include/asm-x8664/cache.h */ -#ifndef __ARCH_I386_CACHE_H -#define __ARCH_I386_CACHE_H +#ifndef __ARCH_X8664_CACHE_H +#define __ARCH_X8664_CACHE_H #include diff --git a/xen/include/asm-x86_64/config.h b/xen/include/asm-x86_64/config.h index bfd99a0725..fe622e88a5 100644 --- a/xen/include/asm-x86_64/config.h +++ b/xen/include/asm-x86_64/config.h @@ -4,8 +4,8 @@ * A Linux-style configuration list. */ -#ifndef __XENO_CONFIG_H__ -#define __XENO_CONFIG_H__ +#ifndef __XENO_X86_64_CONFIG_H__ +#define __XENO_X86_64_CONFIG_H__ #define CONFIG_X86 1 @@ -53,9 +53,18 @@ #define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #define ____cacheline_aligned __cacheline_aligned -/*** Hypervisor owns top 64MB of virtual address space. ***/ -#define HYPERVISOR_VIRT_START (0xFC000000UL) - +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define HYPERVISOR_VIRT_START (0xFFFF800000000000ULL) + +/* + * Xen exists in the highest 2GB of address space for RIP-relative + * addressing + */ +#define XEN_VIRT_START (0xFFFFFFFF80000000ULL) + /* * First 4MB are mapped read-only for all. It's for the machine->physical * mapping table (MPT table). The following are virtual addresses. @@ -130,8 +139,12 @@ #define barrier() __asm__ __volatile__("": : :"memory") -#define __HYPERVISOR_CS 0x0808 -#define __HYPERVISOR_DS 0x0810 +/* + * Hypervisor segment selectors + */ +#define __HYPERVISOR_CS64 0x0810 +#define __HYPERVISOR_CS32 0x0808 +#define __HYPERVISOR_DS 0x0818 #define NR_syscalls 256 @@ -167,4 +180,4 @@ extern unsigned int opt_ser_baud; #endif /* __ASSEMBLY__ */ -#endif /* __XENO_CONFIG_H__ */ +#endif /* __XENO_X86_64_CONFIG_H__ */ diff --git a/xen/include/asm-x86_64/cpufeature.h b/xen/include/asm-x86_64/cpufeature.h index 85b8b43974..7d9f90e813 100644 --- a/xen/include/asm-x86_64/cpufeature.h +++ b/xen/include/asm-x86_64/cpufeature.h @@ -4,8 +4,8 @@ * Defines x86 CPU feature bits */ -#ifndef __ASM_I386_CPUFEATURE_H -#define __ASM_I386_CPUFEATURE_H +#ifndef __ASM_X8664_CPUFEATURE_H +#define __ASM_X8664_CPUFEATURE_H /* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ #define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) @@ -40,7 +40,6 @@ #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ #define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ -#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ #define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ #define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ @@ -64,9 +63,7 @@ #define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) - -#endif /* __ASM_I386_CPUFEATURE_H */ +#endif /* __ASM_X8664_CPUFEATURE_H */ /* * Local Variables: diff --git a/xen/include/asm-x86_64/current.h b/xen/include/asm-x86_64/current.h index ee5b4b8516..dd288ca8b3 100644 --- a/xen/include/asm-x86_64/current.h +++ b/xen/include/asm-x86_64/current.h @@ -1,16 +1,18 @@ -#ifndef _I386_CURRENT_H -#define _I386_CURRENT_H +#ifndef _X86_64_CURRENT_H +#define _X86_64_CURRENT_H +#if !defined(__ASSEMBLY__) struct task_struct; +#include + #define STACK_RESERVED \ - (sizeof(execution_context_t) + sizeof(struct task_struct *)) + (sizeof(execution_context_t)) static inline struct task_struct * get_current(void) { struct task_struct *current; - __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0" - : "=r" (current) : "0" (STACK_SIZE-4) ); + current = read_pda(pcurrent); return current; } @@ -18,30 +20,29 @@ static inline struct task_struct * get_current(void) static inline void set_current(struct task_struct *p) { - __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)" - : : "r" (STACK_SIZE-4), "r" (p) ); + write_pda(pcurrent,p); } static inline execution_context_t *get_execution_context(void) { execution_context_t *execution_context; - __asm__ ( "andl %%esp,%0; addl %2,%0" - : "=r" (execution_context) - : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); + __asm__( "andq %%rsp,%0; addl %2,%0" + : "=r" (execution_context) + : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); return execution_context; } static inline unsigned long get_stack_top(void) { unsigned long p; - __asm__ ( "orl %%esp,%0; andl $~3,%0" - : "=r" (p) : "0" (STACK_SIZE-4) ); + __asm__ ( "orq %%rsp,%0; andq $~7,%0" + : "=r" (p) : "0" (STACK_SIZE-8) ); return p; } #define schedule_tail(_p) \ __asm__ __volatile__ ( \ - "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \ + "andq %%rsp,%0; addq %2,%0; movl %0,%%rsp; jmp *%1" \ : : "r" (~(STACK_SIZE-1)), \ "r" (unlikely(is_idle_task((_p))) ? \ continue_cpu_idle_loop : \ @@ -49,4 +50,14 @@ static inline unsigned long get_stack_top(void) "i" (STACK_SIZE-STACK_RESERVED) ) -#endif /* !(_I386_CURRENT_H) */ +#else + +#ifndef ASM_OFFSET_H +#include +#endif + +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif + +#endif /* !(_X86_64_CURRENT_H) */ diff --git a/xen/include/asm-x86_64/debugreg.h b/xen/include/asm-x86_64/debugreg.h index f0b2b06ae0..bd1aab1d8c 100644 --- a/xen/include/asm-x86_64/debugreg.h +++ b/xen/include/asm-x86_64/debugreg.h @@ -1,5 +1,5 @@ -#ifndef _I386_DEBUGREG_H -#define _I386_DEBUGREG_H +#ifndef _X86_64_DEBUGREG_H +#define _X86_64_DEBUGREG_H /* Indicate the register numbers for a number of the specific @@ -38,6 +38,7 @@ #define DR_LEN_1 (0x0) /* Settings for data length to trap on */ #define DR_LEN_2 (0x4) #define DR_LEN_4 (0xC) +#define DR_LEN_8 (0x8) /* The low byte to the control register determine which registers are enabled. There are 4 fields of two bits. One bit is "local", meaning @@ -57,7 +58,7 @@ We can slow the instruction pipeline for instructions coming via the gdt or the ldt if we want to. I am not sure why this is an advantage */ -#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ diff --git a/xen/include/asm-x86_64/delay.h b/xen/include/asm-x86_64/delay.h index 9e0adb4a27..a04cdb4346 100644 --- a/xen/include/asm-x86_64/delay.h +++ b/xen/include/asm-x86_64/delay.h @@ -1,5 +1,5 @@ -#ifndef _I386_DELAY_H -#define _I386_DELAY_H +#ifndef _X86_64_DELAY_H +#define _X86_64_DELAY_H /* * Copyright (C) 1993 Linus Torvalds @@ -11,4 +11,4 @@ extern unsigned long ticks_per_usec; extern void __udelay(unsigned long usecs); #define udelay(n) __udelay(n) -#endif /* defined(_I386_DELAY_H) */ +#endif /* defined(_X86_64_DELAY_H) */ diff --git a/xen/include/asm-x86_64/desc.h b/xen/include/asm-x86_64/desc.h index 780f9c8728..ef864de036 100644 --- a/xen/include/asm-x86_64/desc.h +++ b/xen/include/asm-x86_64/desc.h @@ -1,17 +1,19 @@ #ifndef __ARCH_DESC_H #define __ARCH_DESC_H -#define LDT_ENTRY_SIZE 8 +#define LDT_ENTRY_SIZE 16 #define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1) +#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) -#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) +#define __CPU_DESC_INDEX(x,field) \ + ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (FIRST_PER_CPU_ENTRY*8)) #define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) -#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) ) +#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss))); +#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt))); +#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0)) /* * Guest OS must provide its own code selectors, or use the one we provide. The @@ -24,8 +26,8 @@ (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \ (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ ((_s)&4)) && \ - (((_s)&3) == 1)) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) + (((_s)&3) == 0)) +#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s)) /* These are bitmasks for the first 32 bits of a descriptor table entry. */ #define _SEGMENT_TYPE (15<< 8) @@ -35,17 +37,75 @@ #define _SEGMENT_G ( 1<<23) /* Granularity */ #ifndef __ASSEMBLY__ -struct desc_struct { - unsigned long a,b; + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, +}; + +// 16byte gate +struct gate_struct { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +// 8 byte segment descriptor +struct desc_struct { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; + unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; +} __attribute__((packed)); + +// LDT or TSS descriptor in the GDT. 16 bytes. +struct ldttss_desc { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +// Union of above structures +union desc_union { + struct desc_struct seg; + struct ldttss_desc ldttss; + struct gate_struct gate; }; -extern struct desc_struct gdt_table[]; -extern struct desc_struct *idt, *gdt; +struct per_cpu_gdt { + struct ldttss_desc tss; + struct ldttss_desc ldt; +} ____cacheline_aligned; + struct Xgt_desc_struct { unsigned short size; - unsigned long address __attribute__((packed)); -}; + unsigned long address; +} __attribute__((packed)); + +extern __u8 gdt_table[]; +extern __u8 gdt_end[]; +extern union desc_union *gdt; + +extern struct per_cpu_gdt gdt_cpu_table[]; + +#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, +}; + +extern struct gate_struct *idt; #define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) #define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) diff --git a/xen/include/asm-x86_64/domain_page.h b/xen/include/asm-x86_64/domain_page.h index 1d38d8b310..026fb251be 100644 --- a/xen/include/asm-x86_64/domain_page.h +++ b/xen/include/asm-x86_64/domain_page.h @@ -9,21 +9,19 @@ #include #include - -extern unsigned long *mapcache; -#define MAPCACHE_ENTRIES 1024 +#include /* * Maps a given physical address, returning corresponding virtual address. * The entire page containing that VA is now accessible until a * corresponding call to unmap_domain_mem(). */ -extern void *map_domain_mem(unsigned long pa); +#define map_domain_mem(pa) __va(pa) /* * Pass a VA within a page previously mapped with map_domain_mem(). * That page will then be removed from the mapping lists. */ -extern void unmap_domain_mem(void *va); +#define unmap_domain_mem(va) {} #endif /* __ASM_DOMAIN_PAGE_H__ */ diff --git a/xen/include/asm-x86_64/fixmap.h b/xen/include/asm-x86_64/fixmap.h index b0f455a5af..4aabcd5d61 100644 --- a/xen/include/asm-x86_64/fixmap.h +++ b/xen/include/asm-x86_64/fixmap.h @@ -74,7 +74,7 @@ extern void __set_fixmap (enum fixed_addresses idx, * the start of the fixmap, and leave one page empty * at the top of mem.. */ -#define FIXADDR_TOP (0xffffe000UL) +#define FIXADDR_TOP (0xffffffffffffe000UL) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) diff --git a/xen/include/asm-x86_64/hdreg.h b/xen/include/asm-x86_64/hdreg.h index 1ad5c07394..18561aaed3 100644 --- a/xen/include/asm-x86_64/hdreg.h +++ b/xen/include/asm-x86_64/hdreg.h @@ -1,12 +1,13 @@ /* - * linux/include/asm-i386/hdreg.h + * linux/include/asm-x86_64/hdreg.h * * Copyright (C) 1994-1996 Linus Torvalds & authors */ -#ifndef __ASMi386_HDREG_H -#define __ASMi386_HDREG_H +#ifndef __ASMx86_64_HDREG_H +#define __ASMx86_64_HDREG_H -typedef unsigned short ide_ioreg_t; +//typedef unsigned short ide_ioreg_t; +typedef unsigned long ide_ioreg_t; -#endif /* __ASMi386_HDREG_H */ +#endif /* __ASMx86_64_HDREG_H */ diff --git a/xen/include/asm-x86_64/ide.h b/xen/include/asm-x86_64/ide.h index 6642abf467..1281bb3b4f 100644 --- a/xen/include/asm-x86_64/ide.h +++ b/xen/include/asm-x86_64/ide.h @@ -1,15 +1,15 @@ /* - * linux/include/asm-i386/ide.h + * linux/include/asm-x86_64/ide.h * * Copyright (C) 1994-1996 Linus Torvalds & authors */ /* - * This file contains the i386 architecture specific IDE code. + * This file contains the x86_64 architecture specific IDE code. */ -#ifndef __ASMi386_IDE_H -#define __ASMi386_IDE_H +#ifndef __ASMx86_64_IDE_H +#define __ASMx86_64_IDE_H #ifdef __KERNEL__ @@ -23,8 +23,6 @@ # endif #endif -#define ide__sti() __sti() - static __inline__ int ide_default_irq(ide_ioreg_t base) { switch (base) { @@ -79,6 +77,7 @@ static __inline__ void ide_init_default_hwifs(void) int index; for(index = 0; index < MAX_HWIFS; index++) { + memset(&hw, 0, sizeof hw); ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); hw.irq = ide_default_irq(ide_default_io_base(index)); ide_register_hw(&hw, NULL); @@ -125,4 +124,4 @@ typedef union { #endif /* __KERNEL__ */ -#endif /* __ASMi386_IDE_H */ +#endif /* __ASMx86_64_IDE_H */ diff --git a/xen/include/asm-x86_64/io.h b/xen/include/asm-x86_64/io.h index 8260e8de95..914ee889d6 100644 --- a/xen/include/asm-x86_64/io.h +++ b/xen/include/asm-x86_64/io.h @@ -1,18 +1,115 @@ #ifndef _ASM_IO_H #define _ASM_IO_H -#include -#include +#include -#define IO_SPACE_LIMIT 0xffff +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ -/*#include */ +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#else +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +extern inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ + +#define __IN1(s) \ +extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ + +#define __INS(s) \ +extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#define IO_SPACE_LIMIT 0xffff /* * Temporary debugging check to catch old code using * unmapped ISA addresses. Will be removed in 2.4. */ -#if CONFIG_DEBUG_IOVIRT +#ifdef CONFIG_IO_DEBUG extern void *__io_virt_debug(unsigned long x, const char *file, int line); extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) @@ -22,59 +119,32 @@ //#define __io_phys(x) __pa(x) #endif - -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial */ - -static inline unsigned long virt_to_phys(volatile void * address) +extern inline unsigned long virt_to_phys(volatile void * address) { - return __pa(address); + return __pa(address); } -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void * phys_to_virt(unsigned long address) +extern inline void * phys_to_virt(unsigned long address) { - return __va(address); + return __va(address); } /* - * Change "struct pfn_info" to physical address. + * Change "struct page" to physical address. */ -#ifdef CONFIG_HIGHMEM64G -#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT) +#ifdef CONFIG_DISCONTIGMEM +#include #else -#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT) +#define page_to_phys(page) (((page) - frame_table) << PAGE_SHIFT) #endif -#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table)) -#define page_to_virt(_page) phys_to_virt(page_to_phys(_page)) - - extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); -static inline void * ioremap (unsigned long offset, unsigned long size) +extern inline void * ioremap (unsigned long offset, unsigned long size) { return __ioremap(offset, size, 0); } @@ -84,7 +154,7 @@ static inline void * ioremap (unsigned long offset, unsigned long size) * it's useful if some control registers are in such an area and write combining * or read caching is not desirable: */ -static inline void * ioremap_nocache (unsigned long offset, unsigned long size) +extern inline void * ioremap_nocache (unsigned long offset, unsigned long size) { return __ioremap(offset, size, _PAGE_PCD); } @@ -108,20 +178,25 @@ extern void iounmap(void *addr); #define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) #define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) #define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define readq(addr) (*(volatile unsigned long *) __io_virt(addr)) #define __raw_readb readb #define __raw_readw readw #define __raw_readl readl +#define __raw_readq readq #define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) #define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) #define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define writeq(b,addr) (*(volatile unsigned long *) __io_virt(addr) = (b)) #define __raw_writeb writeb #define __raw_writew writew #define __raw_writel writel +#define __raw_writeq writeq + +void *memcpy_fromio(void*,const void*,unsigned); +void *memcpy_toio(void*,const void*,unsigned); #define memset_io(a,b,c) memset(__io_virt(a),(b),(c)) -#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c)) -#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c)) /* * ISA space is 'always mapped' on a typical x86 system, no need to @@ -145,7 +220,7 @@ extern void iounmap(void *addr); /* - * Again, i386 does not require mem IO specific function. + * Again, x86-64 does not require mem IO specific function. */ #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) @@ -183,102 +258,12 @@ out: return retval; } -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) - -static inline void flush_write_buffers(void) -{ - __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); -} - -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - -#else - /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) #define dma_cache_wback(_start,_size) do { } while (0) #define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() - -#endif - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#else -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -static inline void out##s(unsigned x value, unsigned short port) { - -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" - -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} - -#define __IN1(s) \ -static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" - -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } -#define __INS(s) \ -static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) +#define flush_write_buffers() #endif diff --git a/xen/include/asm-x86_64/io_apic.h b/xen/include/asm-x86_64/io_apic.h index 44916209a8..8227cdb976 100644 --- a/xen/include/asm-x86_64/io_apic.h +++ b/xen/include/asm-x86_64/io_apic.h @@ -97,7 +97,7 @@ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mpc_config_intsrc *mp_irqs; +extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; diff --git a/xen/include/asm-x86_64/ioctl.h b/xen/include/asm-x86_64/ioctl.h index c75f20ade6..609b663b6b 100644 --- a/xen/include/asm-x86_64/ioctl.h +++ b/xen/include/asm-x86_64/ioctl.h @@ -1,10 +1,10 @@ -/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $ +/* $Id: ioctl.h,v 1.2 2001/07/04 09:08:13 ak Exp $ * * linux/ioctl.h for Linux by H.H. Bergman. */ -#ifndef _ASMI386_IOCTL_H -#define _ASMI386_IOCTL_H +#ifndef _ASMX8664_IOCTL_H +#define _ASMX8664_IOCTL_H /* ioctl command encoding: 32 bits total, command in lower 16 bits, * size of the parameter structure in the lower 14 bits of the @@ -72,4 +72,4 @@ #define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) #define IOCSIZE_SHIFT (_IOC_SIZESHIFT) -#endif /* _ASMI386_IOCTL_H */ +#endif /* _ASMX8664_IOCTL_H */ diff --git a/xen/include/asm-x86_64/irq.h b/xen/include/asm-x86_64/irq.h index d88429df4a..b2728d1b55 100644 --- a/xen/include/asm-x86_64/irq.h +++ b/xen/include/asm-x86_64/irq.h @@ -22,9 +22,14 @@ extern void enable_irq(unsigned int); * IDT vectors usable for external interrupt sources start * at 0x20: */ +#define NR_VECTORS 256 #define FIRST_EXTERNAL_VECTOR 0x30 -#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR) +#ifdef CONFIG_X86_IO_APIC +#define NR_IRQS 224 +#else +#define NR_IRQS 16 +#endif #define HYPERVISOR_CALL_VECTOR 0x82 @@ -47,6 +52,7 @@ extern void enable_irq(unsigned int); #define EVENT_CHECK_VECTOR 0xfc #define CALL_FUNCTION_VECTOR 0xfb #define KDB_VECTOR 0xfa +#define TASK_MIGRATION_VECTOR 0xf9 /* * Local APIC timer IRQ vector is on a different priority level, @@ -100,89 +106,15 @@ extern char _stext, _etext; #define __STR(x) #x #define STR(x) __STR(x) -#define SAVE_ALL \ - "cld\n\t" \ - "pushl %gs\n\t" \ - "pushl %fs\n\t" \ - "pushl %es\n\t" \ - "pushl %ds\n\t" \ - "pushl %eax\n\t" \ - "pushl %ebp\n\t" \ - "pushl %edi\n\t" \ - "pushl %esi\n\t" \ - "pushl %edx\n\t" \ - "pushl %ecx\n\t" \ - "pushl %ebx\n\t" \ - "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \ - "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" \ - "movl %edx,%fs\n\t" \ - "movl %edx,%gs\n\t" - #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -/* - * SMP has a few special interrupts for IPI messages - */ - - /* there is a second layer of macro just to get the symbolic - name for the vector evaluated. This change is for RTLinux */ -#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) -#define XBUILD_SMP_INTERRUPT(x,v)\ -asmlinkage void x(void); \ -asmlinkage void call_##x(void); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(x) ":\n\t" \ - "pushl $"#v"-256\n\t" \ - SAVE_ALL \ - SYMBOL_NAME_STR(call_##x)":\n\t" \ - "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) -#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ -asmlinkage void x(struct pt_regs * regs); \ -asmlinkage void call_##x(void); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(x) ":\n\t" \ - "pushl $"#v"-256\n\t" \ - SAVE_ALL \ - "movl %esp,%eax\n\t" \ - "pushl %eax\n\t" \ - SYMBOL_NAME_STR(call_##x)":\n\t" \ - "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ - "addl $4,%esp\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_COMMON_IRQ() \ -asmlinkage void call_do_IRQ(void); \ -__asm__( \ - "\n" __ALIGN_STR"\n" \ - "common_interrupt:\n\t" \ - SAVE_ALL \ - SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ - "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \ - "jmp ret_from_intr\n"); - -/* - * subtle. orig_eax is used by the signal code to distinct between - * system calls and interrupted 'random user-space'. Thus we have - * to put a negative value into orig_eax here. (the problem is that - * both system calls and IRQs want to have small integer numbers in - * orig_eax, and the syscall code has won the optimization conflict ;) - * - * Subtle as a pigs ear. VY - */ - #define BUILD_IRQ(nr) \ asmlinkage void IRQ_NAME(nr); \ __asm__( \ -"\n"__ALIGN_STR"\n" \ +"\n.p2align\n" \ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ - "pushl $"#nr"-256\n\t" \ + "push $"#nr"-256\n\t" \ "jmp common_interrupt"); extern unsigned long prof_cpu_mask; diff --git a/xen/include/asm-x86_64/mpspec.h b/xen/include/asm-x86_64/mpspec.h index 7ca70f41a8..fa5d7aa2df 100644 --- a/xen/include/asm-x86_64/mpspec.h +++ b/xen/include/asm-x86_64/mpspec.h @@ -29,7 +29,7 @@ struct intel_mp_floating { char mpf_signature[4]; /* "_MP_" */ - unsigned long mpf_physptr; /* Configuration table address */ + unsigned int mpf_physptr; /* Configuration table address */ unsigned char mpf_length; /* Our length (paragraphs) */ unsigned char mpf_specification;/* Specification version */ unsigned char mpf_checksum; /* Checksum (makes sum 0) */ @@ -49,11 +49,11 @@ struct mp_config_table char mpc_checksum; char mpc_oem[8]; char mpc_productid[12]; - unsigned long mpc_oemptr; /* 0 if not present */ + unsigned int mpc_oemptr; /* 0 if not present */ unsigned short mpc_oemsize; /* 0 if not present */ unsigned short mpc_oemcount; - unsigned long mpc_lapic; /* APIC address */ - unsigned long reserved; + unsigned int mpc_lapic; /* APIC address */ + unsigned int reserved; }; /* Followed by entries */ @@ -73,12 +73,12 @@ struct mpc_config_processor unsigned char mpc_cpuflag; #define CPU_ENABLED 1 /* Processor is available */ #define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ - unsigned long mpc_cpufeature; + unsigned int mpc_cpufeature; #define CPU_STEPPING_MASK 0x0F #define CPU_MODEL_MASK 0xF0 #define CPU_FAMILY_MASK 0xF00 - unsigned long mpc_featureflag; /* CPUID feature value */ - unsigned long mpc_reserved[2]; + unsigned int mpc_featureflag; /* CPUID feature value */ + unsigned int mpc_reserved[2]; }; struct mpc_config_bus @@ -115,7 +115,7 @@ struct mpc_config_ioapic unsigned char mpc_apicver; unsigned char mpc_flags; #define MPC_APIC_USABLE 0x01 - unsigned long mpc_apicaddr; + unsigned int mpc_apicaddr; }; struct mpc_config_intsrc @@ -186,23 +186,18 @@ struct mpc_config_translation * 7 2 CPU MCA+PCI */ -#ifdef CONFIG_MULTIQUAD -#define MAX_IRQ_SOURCES 512 -#else /* !CONFIG_MULTIQUAD */ -#define MAX_IRQ_SOURCES 256 -#endif /* CONFIG_MULTIQUAD */ - -#define MAX_MP_BUSSES 32 +#define MAX_MP_BUSSES 257 +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES*4) enum mp_bustype { MP_BUS_ISA = 1, MP_BUS_EISA, MP_BUS_PCI, MP_BUS_MCA }; -extern int *mp_bus_id_to_type; -extern int *mp_bus_id_to_node; -extern int *mp_bus_id_to_local; -extern int *mp_bus_id_to_pci_bus; +extern int mp_bus_id_to_type [MAX_MP_BUSSES]; +extern int mp_bus_id_to_node [MAX_MP_BUSSES]; +extern int mp_bus_id_to_local [MAX_MP_BUSSES]; +extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; extern unsigned int boot_cpu_physical_apicid; diff --git a/xen/include/asm-x86_64/msr.h b/xen/include/asm-x86_64/msr.h index 45ec765e6e..f630034630 100644 --- a/xen/include/asm-x86_64/msr.h +++ b/xen/include/asm-x86_64/msr.h @@ -1,6 +1,7 @@ -#ifndef __ASM_MSR_H -#define __ASM_MSR_H +#ifndef X86_64_MSR_H +#define X86_64_MSR_H 1 +#ifndef __ASSEMBLY__ /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using @@ -8,9 +9,17 @@ */ #define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + + +#define rdmsrl(msr,val) do { unsigned long a__,b__; \ + __asm__ __volatile__("rdmsr" \ + : "=a" (a__), "=d" (b__) \ + : "c" (msr)); \ + val = a__ | (b__<<32); \ +} while(0); #define wrmsr(msr,val1,val2) \ __asm__ __volatile__("wrmsr" \ @@ -23,8 +32,11 @@ #define rdtscl(low) \ __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) +#define rdtscll(val) do { \ + unsigned int a,d; \ + asm volatile("rdtsc" : "=a" (a), "=d" (d)); \ + (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ +} while(0) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) @@ -33,64 +45,94 @@ : "=a" (low), "=d" (high) \ : "c" (counter)) -/* symbolic names for some interesting MSRs */ -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 +#endif + +/* AMD/K8 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1< typedef struct { unsigned long l1_lo; } l1_pgentry_t; typedef struct { unsigned long l2_lo; } l2_pgentry_t; +typedef struct { unsigned long l3_lo; } l3_pgentry_t; +typedef struct { unsigned long l4_lo; } l4_pgentry_t; typedef l1_pgentry_t *l1_pagetable_t; typedef l2_pgentry_t *l2_pagetable_t; +typedef l3_pgentry_t *l3_pagetable_t; +typedef l4_pgentry_t *l4_pagetable_t; typedef struct { unsigned long pt_lo; } pagetable_t; +typedef struct { unsigned long pgprot; } pgprot_t; #endif /* !__ASSEMBLY__ */ /* Strip type from a table entry. */ #define l1_pgentry_val(_x) ((_x).l1_lo) #define l2_pgentry_val(_x) ((_x).l2_lo) +#define l3_pgentry_val(_x) ((_x).l3_lo) +#define l4_pgentry_val(_x) ((_x).l4_lo) #define pagetable_val(_x) ((_x).pt_lo) #define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) #define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l3_pagetable() ((l3_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l4_pagetable() ((l4_pgentry_t *)get_free_page(GFP_KERNEL)) /* Add type to a table entry. */ #define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) #define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) +#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } ) +#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } ) #define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) /* Turn a typed table entry into a page index. */ #define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) #define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) +#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT) +#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT) /* Turn a typed table entry into a physical address. */ #define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK) #define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK) +#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK) +#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK) /* Dereference a typed level-2 entry to yield a typed level-1 table. */ #define l2_pgentry_to_l1(_x) \ ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) +/* Dereference a typed level-4 entry to yield a typed level-3 table. */ +#define l4_pgentry_to_l3(_x) \ + ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK)) + +/* Dereference a typed level-3 entry to yield a typed level-2 table. */ +#define l3_pgentry_to_l2(_x) \ + ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK)) + /* Given a virtual address, get an entry offset into a page table. */ #define l1_table_offset(_a) \ (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) #define l2_table_offset(_a) \ - ((_a) >> L2_PAGETABLE_SHIFT) + (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE - 1)) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE - 1)) +#define l4_table_offset(_a) \ + ((_a) >> L4_PAGETABLE_SHIFT) /* Hypervisor table entries use zero to sugnify 'empty'. */ #define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) #define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) +#define l3_pgentry_empty(_x) (!l3_pgentry_val(_x)) +#define l4_pgentry_empty(_x) (!l4_pgentry_val(_x)) -#define __PAGE_OFFSET (0xFC400000) -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) -#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - frame_table) < max_mapnr) + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) /* * NB. We don't currently track I/O holes in the physical RAM space. @@ -79,10 +137,15 @@ typedef struct { unsigned long pt_lo; } pagetable_t; #define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page)) /* High table entries are reserved by the hypervisor. */ -#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ - (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) -#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \ - (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) +#define DOMAIN_ENTRIES_PER_L4_PAGETABLE \ + (HYPERVISOR_VIRT_START >> L4_PAGETABLE_SHIFT) +#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \ + (ENTRIES_PER_L4_PAGETABLE - DOMAIN_ENTRIES_PER_L4_PAGETABLE) + +#define __START_KERNEL 0xffffffff80100000 +#define __START_KERNEL_map 0xffffffff80000000 +#define __PAGE_OFFSET 0x0000010000000000 +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #ifndef __ASSEMBLY__ #include @@ -90,6 +153,10 @@ typedef struct { unsigned long pt_lo; } pagetable_t; #include #include +extern unsigned long vm_stack_flags, vm_stack_flags32; +extern unsigned long vm_data_default_flags, vm_data_default_flags32; +extern unsigned long vm_force_exec32; + #define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; @@ -124,8 +191,21 @@ extern void paging_init(void); #define __flush_tlb_one(__addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) -#endif /* !__ASSEMBLY__ */ +#include + +/* + * Tell the user there is some problem. The exception handler decodes this frame. + */ +struct bug_frame { + unsigned char ud2[2]; + char *filename; /* should use 32bit offset instead, but the assembler doesn't like it */ + unsigned short line; +} __attribute__((packed)); +#define HEADER_BUG() asm volatile("ud2 ; .quad %P1 ; .short %P0" :: "i"(__LINE__), \ + "i" (__stringify(__FILE__))) +#define PAGE_BUG(page) BUG() +#endif /* ASSEMBLY */ #define _PAGE_PRESENT 0x001 #define _PAGE_RW 0x002 @@ -151,6 +231,14 @@ __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) #define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) #define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) +#define mk_l4_writeable(_p) \ + (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l4_readonly(_p) \ + (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) & ~_PAGE_RW)) +#define mk_l3_writeable(_p) \ + (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l3_readonly(_p) \ + (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) & ~_PAGE_RW)) #define mk_l2_writeable(_p) \ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW)) #define mk_l2_readonly(_p) \ @@ -160,6 +248,27 @@ __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) #define mk_l1_readonly(_p) \ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW)) +/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. + Otherwise you risk miscompilation. */ +#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) +/* __pa_symbol should use for C visible symbols, but only for them. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) \ + ({unsigned long v; \ + asm("" : "=r" (v) : "0" (x)); \ + v - __START_KERNEL_map; }) +#define __pa_maybe_symbol(x) \ + ({unsigned long v; \ + asm("" : "=r" (v) : "0" (x)); \ + __pa(v); }) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#ifndef CONFIG_DISCONTIGMEM +#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) +#define pfn_to_page(pfn) (frame_table + (pfn)) +#define page_to_pfn(page) ((page) - frame_table) +#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) +#define VALID_PAGE(page) (((page) - frame_table) < max_mapnr) +#endif #ifndef __ASSEMBLY__ static __inline__ int get_order(unsigned long size) @@ -176,4 +285,16 @@ static __inline__ int get_order(unsigned long size) } #endif -#endif /* _I386_PAGE_H */ +#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) + +#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS \ + ((current->thread.flags & THREAD_IA32) ? vm_data_default_flags32 : \ + vm_data_default_flags) +#define VM_STACK_FLAGS vm_stack_flags + +#endif /* _X86_64_PAGE_H */ diff --git a/xen/include/asm-x86_64/param.h b/xen/include/asm-x86_64/param.h index 1b10bf49fe..601733b463 100644 --- a/xen/include/asm-x86_64/param.h +++ b/xen/include/asm-x86_64/param.h @@ -1,5 +1,5 @@ -#ifndef _ASMi386_PARAM_H -#define _ASMi386_PARAM_H +#ifndef _ASMx86_64_PARAM_H +#define _ASMx86_64_PARAM_H #ifndef HZ #define HZ 100 diff --git a/xen/include/asm-x86_64/pci.h b/xen/include/asm-x86_64/pci.h index a38bef4986..df9889d0fe 100644 --- a/xen/include/asm-x86_64/pci.h +++ b/xen/include/asm-x86_64/pci.h @@ -1,9 +1,9 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H +#ifndef __x8664_PCI_H +#define __x8664_PCI_H #include +#include -#ifdef __KERNEL__ /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes @@ -24,23 +24,16 @@ void pcibios_penalize_isa_irq(int irq); struct irq_routing_table *pcibios_get_irq_routing_table(void); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); -/* Dynamic DMA mapping stuff. - * i386 has everything mapped statically. - */ - #include #include #include /*#include */ #include +#include +#include struct pci_dev; - -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) +extern int force_mmu; /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices, @@ -63,28 +56,82 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +#ifdef CONFIG_GART_IOMMU + /* Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. * * Once the device is given the dma address, the device owns this memory * until either pci_unmap_single or pci_dma_sync_single is performed. */ +extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction); + + +void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr, + size_t size, int direction); + +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct pfn_info instead of a virtual address + */ + +#define pci_map_page(dev,page,offset,size,dir) \ + pci_map_single((dev), page_address(page)+(offset), (size), (dir)) + +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + +static inline void pci_dma_sync_single(struct pci_dev *hwdev, + dma_addr_t dma_handle, + size_t size, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +static inline void pci_dma_sync_sg(struct pci_dev *hwdev, + struct scatterlist *sg, + int nelems, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + + +#else static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) { + dma_addr_t addr; + if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); - return virt_to_bus(ptr); + out_of_line_bug(); + addr = virt_to_bus(ptr); + + /* + * This is gross, but what should I do. + * Unfortunately drivers do not test the return value of this. + */ + if ((addr+size) & ~hwdev->dma_mask) + out_of_line_bug(); + return addr; } -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) { @@ -93,25 +140,16 @@ static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, /* Nothing to do */ } -/* - * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical - * to pci_map_single, but takes a struct pfn_info instead of a virtual address - */ static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page, unsigned long offset, size_t size, int direction) { + dma_addr_t addr; if (direction == PCI_DMA_NONE) + out_of_line_bug(); + addr = (page - frame_table) * PAGE_SIZE + offset; + if ((addr+size) & ~hwdev->dma_mask) out_of_line_bug(); - - return (dma_addr_t)(page - frame_table) * PAGE_SIZE + offset; -} - -static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ + return addr; } /* pci_unmap_{page,single} is a nop so... */ @@ -122,6 +160,8 @@ static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, #define pci_unmap_len(PTR, LEN_NAME) (0) #define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define BAD_DMA_ADDRESS (-1UL) + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -141,45 +181,47 @@ static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) { int i; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - /* - * temporary 2.4 hack - */ - for (i = 0; i < nents; i++ ) { - if (sg[i].address && sg[i].page) - out_of_line_bug(); + + BUG_ON(direction == PCI_DMA_NONE); + + /* + * temporary 2.4 hack + */ + for (i = 0; i < nents; i++ ) { + struct scatterlist *s = &sg[i]; + void *addr = s->address; + if (addr) + BUG_ON(s->page || s->offset); + else if (s->page) + addr = page_address(s->page) + s->offset; #if 0 /* Invalid check, since address==0 is valid. */ - else if (!sg[i].address && !sg[i].page) - out_of_line_bug(); + else + BUG(); #endif - - /* XXX Switched round, since address==0 is valid. */ - if (sg[i].page) - sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - else - sg[i].dma_address = virt_to_bus(sg[i].address); - } - - flush_write_buffers(); + s->dma_address = pci_map_single(hwdev, addr, s->length, direction); + if (unlikely(s->dma_address == BAD_DMA_ADDRESS)) + goto error; + } return nents; + + error: + pci_unmap_sg(hwdev, sg, i, direction); + return 0; } - + /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) +static inline void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg, + int nents, int dir) { if (direction == PCI_DMA_NONE) out_of_line_bug(); - /* Nothing to do */ } + /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. * @@ -213,6 +255,17 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, flush_write_buffers(); } +#define PCI_DMA_BUS_IS_PHYS 1 + +#endif + +extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); + +#define pci_unmap_page pci_unmap_single + /* Return whether the given PCI device DMA address mask can * be supported properly. For example, if your device can * only drive the low 24-bits during PCI bus mastering, then @@ -244,8 +297,6 @@ pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long o static __inline__ struct pfn_info * pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) { - unsigned long poff = (dma_addr >> PAGE_SHIFT); - return frame_table + poff; } @@ -281,6 +332,5 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); #endif -#endif /* __KERNEL__ */ -#endif /* __i386_PCI_H */ +#endif /* __x8664_PCI_H */ diff --git a/xen/include/asm-x86_64/pgalloc.h b/xen/include/asm-x86_64/pgalloc.h index 6f01b44441..de866465ef 100644 --- a/xen/include/asm-x86_64/pgalloc.h +++ b/xen/include/asm-x86_64/pgalloc.h @@ -1,38 +1,12 @@ -#ifndef _I386_PGALLOC_H -#define _I386_PGALLOC_H +#ifndef _X86_64_PGALLOC_H +#define _X86_64_PGALLOC_H #include #include #include #include -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - - -/* - * Allocate and free page tables. - */ - - -#define pte_free(pte) pte_free_fast(pte) -#define pgd_alloc(mm) get_pgd_fast() -#define pgd_free(pgd) free_pgd_fast(pgd) - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) - */ - -#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free_slow(x) do { } while (0) -#define pmd_free_fast(x) do { } while (0) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() +/* XXX probably should be moved to flushtlb.h */ /* * TLB flushing: @@ -40,9 +14,6 @@ * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. */ #ifndef CONFIG_SMP @@ -56,7 +27,6 @@ #define try_flush_tlb_mask(_mask) __flush_tlb() #else - #include extern int try_flush_tlb_mask(unsigned long mask); @@ -70,10 +40,4 @@ extern void flush_tlb_all_pge(void); #endif -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - -#endif /* _I386_PGALLOC_H */ +#endif /* _X86_64_PGALLOC_H */ diff --git a/xen/include/asm-x86_64/processor.h b/xen/include/asm-x86_64/processor.h index c7df85aa28..47d0f751e9 100644 --- a/xen/include/asm-x86_64/processor.h +++ b/xen/include/asm-x86_64/processor.h @@ -1,11 +1,11 @@ /* - * include/asm-i386/processor.h + * include/asm-x86_64/processor.h * * Copyright (C) 1994 Linus Torvalds */ -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H +#ifndef __ASM_X86_64_PROCESSOR_H +#define __ASM_X86_64_PROCESSOR_H #include #include @@ -16,12 +16,21 @@ struct task_struct; +#define TF_MASK 0x00000100 +#define IF_MASK 0x00000200 +#define IOPL_MASK 0x00003000 +#define NT_MASK 0x00004000 +#define VM_MASK 0x00020000 +#define AC_MASK 0x00040000 +#define VIF_MASK 0x00080000 /* virtual interrupt flag */ +#define VIP_MASK 0x00100000 /* virtual interrupt pending */ +#define ID_MASK 0x00200000 + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). */ -#define current_text_addr() \ - ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) +#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; }) /* * CPU type and hardware bug flags. Kept separately for each CPU. @@ -30,18 +39,22 @@ struct task_struct; */ struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int x86_clflush_size; + int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ + __u8 x86_virt_bits, x86_phys_bits; + __u32 x86_power; + unsigned long loops_per_jiffy; +} ____cacheline_aligned; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 @@ -68,16 +81,17 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif -#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) -#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability)) -#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability)) -#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) -#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability)) -#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability)) -#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability)) -#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) -#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability)) -#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) +#define cpu_has_pge 1 +#define cpu_has_pse 1 +#define cpu_has_pae 1 +#define cpu_has_tsc 1 +#define cpu_has_de 1 +#define cpu_has_vme 1 +#define cpu_has_fxsr 1 +#define cpu_has_xmm 1 +#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) + +extern char ignore_irq13; extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); @@ -105,60 +119,61 @@ extern void dodgy_tsc(void); #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ /* - * Generic CPUID function + * Generic CPUID function + * FIXME: This really belongs to msr.h */ -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) { - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); } /* * CPUID functions returning a single datum */ -static inline unsigned int cpuid_eax(unsigned int op) +extern inline unsigned int cpuid_eax(unsigned int op) { - unsigned int eax; + unsigned int eax; - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; } -static inline unsigned int cpuid_ebx(unsigned int op) +extern inline unsigned int cpuid_ebx(unsigned int op) { - unsigned int eax, ebx; + unsigned int eax, ebx; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; } -static inline unsigned int cpuid_ecx(unsigned int op) +extern inline unsigned int cpuid_ecx(unsigned int op) { - unsigned int eax, ecx; + unsigned int eax, ecx; - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; } -static inline unsigned int cpuid_edx(unsigned int op) +extern inline unsigned int cpuid_edx(unsigned int op) { - unsigned int eax, edx; + unsigned int eax, edx; - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; } @@ -177,15 +192,15 @@ static inline unsigned int cpuid_edx(unsigned int op) #define X86_CR0_PG 0x80000000 /* Paging (RW) */ #define read_cr0() ({ \ - unsigned int __dummy; \ + unsigned long __dummy; \ __asm__( \ - "movl %%cr0,%0\n\t" \ + "movq %%cr0,%0\n\t" \ :"=r" (__dummy)); \ __dummy; \ }) #define write_cr0(x) \ - __asm__("movl %0,%%cr0": :"r" (x)); + __asm__("movq %0,%%cr0": :"r" (x)); @@ -214,22 +229,22 @@ extern unsigned long mmu_cr4_features; static inline void set_in_cr4 (unsigned long mask) { - mmu_cr4_features |= mask; - __asm__("movl %%cr4,%%eax\n\t" - "orl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (mask) - :"ax"); + mmu_cr4_features |= mask; + __asm__("movq %%cr4,%%rax\n\t" + "orq %0,%%rax\n\t" + "movq %%rax,%%cr4\n" + : : "irg" (mask) + :"ax"); } static inline void clear_in_cr4 (unsigned long mask) { - mmu_cr4_features &= ~mask; - __asm__("movl %%cr4,%%eax\n\t" - "andl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (~mask) - :"ax"); + mmu_cr4_features &= ~mask; + __asm__("movq %%cr4,%%rax\n\t" + "andq %0,%%rax\n\t" + "movq %%rax,%%cr4\n" + : : "irg" (~mask) + :"ax"); } /* @@ -259,25 +274,26 @@ static inline void clear_in_cr4 (unsigned long mask) outb((data), 0x23); \ } while (0) -#define EISA_bus (0) -#define MCA_bus (0) +/* + * Bus types + */ +#define EISA_bus 0 +#define MCA_bus 0 +#define MCA_bus__is_a_macro -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; /* - * User space process size: 3GB (default). + * User space process size: 512GB - 1GB (default). */ -#define TASK_SIZE (PAGE_OFFSET) +#define TASK_SIZE (0x0000007fc0000000) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) +#define TASK_UNMAPPED_32 0xa0000000 +#define TASK_UNMAPPED_64 (TASK_SIZE/3) +#define TASK_UNMAPPED_BASE \ + ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64) /* * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. @@ -286,125 +302,84 @@ extern unsigned int mca_pentium_flag; #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ -}; - struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long reserved; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; + u16 cwd; + u16 swd; + u16 twd; + u16 fop; + u64 rip; + u64 rdp; + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ + u32 padding[24]; } __attribute__ ((aligned (16))); union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; + struct i387_fxsave_struct fxsave; }; typedef struct { - unsigned long seg; + unsigned long seg; } mm_segment_t; struct tss_struct { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, bitmap; - unsigned long io_bitmap[IO_BITMAP_SIZE+1]; - /* - * pads the TSS to be cacheline-aligned (size is 0x100) - */ - unsigned long __cacheline_filler[5]; -}; + unsigned short back_link,__blh; +/* u32 reserved1; */ + u64 rsp0; + u64 rsp1; + u64 rsp2; + u64 reserved2; + u64 ist[7]; + u32 reserved3; + u32 reserved4; + u16 reserved5; + u16 io_map_base; + u32 io_bitmap[IO_BITMAP_SIZE]; +} __attribute__((packed)) ____cacheline_aligned; struct thread_struct { - unsigned long esp1, ss1; + unsigned long rsp0; + unsigned long rip; + unsigned long rsp; + unsigned long userrsp; /* Copy from PDA */ + unsigned long fs; + unsigned long gs; + unsigned short es, ds, fsindex, gsindex; + enum { + THREAD_IA32 = 0x0001, + } flags; /* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ /* floating point info */ - union i387_union i387; + union i387_union i387; /* Trap info. */ - int fast_trap_idx; - struct desc_struct fast_trap_desc; - trap_info_t traps[256]; + trap_info_t traps[256]; }; #define IDT_ENTRIES 256 -extern struct desc_struct idt_table[]; -extern struct desc_struct *idt_tables[]; - -#define SET_DEFAULT_FAST_TRAP(_p) \ - (_p)->fast_trap_idx = 0x20; \ - (_p)->fast_trap_desc.a = 0; \ - (_p)->fast_trap_desc.b = 0; - -#define CLEAR_FAST_TRAP(_p) \ - (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - 0, 8)) - -#define SET_FAST_TRAP(_p) \ - (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - &((_p)->fast_trap_desc), 8)) - -long set_fast_trap(struct task_struct *p, int idx); +extern struct gate_struct idt_table[]; +extern struct gate_struct *idt_tables[]; #define INIT_THREAD { \ 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, /* flags */ \ { [0 ... 7] = 0 }, /* debugging registers */ \ { { 0, }, }, /* 387 state */ \ - 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \ { {0} } /* io permissions */ \ } #define INIT_TSS { \ 0,0, /* back_link, __blh */ \ - 0, /* esp0 */ \ - 0, 0, /* ss0 */ \ - 0,0,0,0,0,0, /* stack1, stack2 */ \ - 0, /* cr3 */ \ - 0,0, /* eip,eflags */ \ - 0,0,0,0, /* eax,ecx,edx,ebx */ \ - 0,0,0,0, /* esp,ebp,esi,edi */ \ - 0,0,0,0,0,0, /* es,cs,ss */ \ - 0,0,0,0,0,0, /* ds,fs,gs */ \ - 0,0, /* ldt */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + 0, /* rsp0 */ \ + 0, 0, /* rsp1, rsp2 */ \ + 0, /* reserved */ \ + { [0 ... 6] = 0 }, /* ist[] */ \ + 0,0, /* reserved */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* trace, bitmap */ \ {~0, } /* ioperm */ \ } @@ -418,7 +393,7 @@ struct mm_struct { /* Current LDT details. */ unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; /* Next entry is passed to LGDT on domain switch. */ - char gdt[6]; + char gdt[10]; }; #define IDLE0_MM \ @@ -429,9 +404,9 @@ struct mm_struct { /* Convenient accessor for mm.gdt. */ #define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e)) -#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a)) +#define SET_GDT_ADDRESS(_p, _a) ((*(u64 *)((_p)->mm.gdt + 2)) = (_a)) #define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0))) -#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2))) +#define GET_GDT_ADDRESS(_p) ((*(u64 *)((_p)->mm.gdt + 2))) long set_gdt(struct task_struct *p, unsigned long *frames, @@ -462,32 +437,26 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() -/* Prefetch instructions for Pentium III and AMD Athlon */ -#ifdef CONFIG_MPENTIUMIII +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} +/* Avoid speculative execution by the CPU */ +extern inline void sync_core(void) +{ + int tmp; + asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); +} -#elif CONFIG_X86_USE_3DNOW +#define cpu_has_fpu 1 #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW #define ARCH_HAS_SPINLOCK_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} +#define prefetch(x) __builtin_prefetch((x),0) +#define prefetchw(x) __builtin_prefetch((x),1) +#define spin_lock_prefetch(x) prefetchw(x) +#define cpu_relax() rep_nop() -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif -#endif /* __ASM_I386_PROCESSOR_H */ +#endif /* __ASM_X86_64_PROCESSOR_H */ diff --git a/xen/include/asm-x86_64/ptrace.h b/xen/include/asm-x86_64/ptrace.h index 26269afcb0..da0419f429 100644 --- a/xen/include/asm-x86_64/ptrace.h +++ b/xen/include/asm-x86_64/ptrace.h @@ -1,27 +1,92 @@ -#ifndef _I386_PTRACE_H -#define _I386_PTRACE_H +#ifndef _X86_64_PTRACE_H +#define _X86_64_PTRACE_H + +#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 36 +#define RBX 40 +/* arguments: interrupts/non tracing syscalls only save upto here*/ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* = ERROR */ +/* end of arguments */ +/* cpu exception frame or undefined in case of fast syscall. */ +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 +#define ARGOFFSET R11 +#endif /* __ASSEMBLY__ */ + +/* top of stack page */ +#define FRAME_SIZE 168 + +#define PTRACE_SETOPTIONS 21 + +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 0x00000001 + +/* Dummy values for ptrace */ +#define FS 1000 +#define GS 1008 + +#ifndef __ASSEMBLY__ struct pt_regs { - long ebx; - long ecx; - long edx; - long esi; - long edi; - long ebp; - long eax; - int xds; - int xes; - int xfs; - int xgs; - long orig_eax; - long eip; - int xcs; - long eflags; - long esp; - int xss; + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ }; -enum EFLAGS { +#endif + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#define user_mode(regs) (!!((regs)->cs & 3)) +#define instruction_pointer(regs) ((regs)->rip) +extern void show_regs(struct pt_regs *); + +enum { EF_CF = 0x00000001, EF_PF = 0x00000004, EF_AF = 0x00000010, @@ -44,8 +109,6 @@ enum EFLAGS { EF_ID = 0x00200000, /* id */ }; -#ifdef __KERNEL__ -#define user_mode(regs) ((3 & (regs)->xcs)) #endif #endif diff --git a/xen/include/asm-x86_64/rwlock.h b/xen/include/asm-x86_64/rwlock.h index 9475419f95..8920e5829f 100644 --- a/xen/include/asm-x86_64/rwlock.h +++ b/xen/include/asm-x86_64/rwlock.h @@ -1,4 +1,4 @@ -/* include/asm-i386/rwlock.h +/* include/asm-x86_64/rwlock.h * * Helpers used by both rw spinlocks and rw semaphores. * @@ -6,6 +6,7 @@ * spinlock.h Copyright 1996 Linus Torvalds. * * Copyright 1999 Red Hat, Inc. + * Copyright 2001,2002 SuSE labs * * Written by Benjamin LaHaise. * @@ -14,8 +15,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#ifndef _ASM_I386_RWLOCK_H -#define _ASM_I386_RWLOCK_H +#ifndef _ASM_X86_64_RWLOCK_H +#define _ASM_X86_64_RWLOCK_H #define RW_LOCK_BIAS 0x01000000 #define RW_LOCK_BIAS_STR "0x01000000" @@ -35,10 +36,10 @@ "js 2f\n" \ "1:\n" \ ".section .text.lock,\"ax\"\n" \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ + "2:\tpushq %%rax\n\t" \ + "leaq %0,%%rax\n\t" \ "call " helper "\n\t" \ - "popl %%eax\n\t" \ + "popq %%rax\n\t" \ "jmp 1b\n" \ ".previous" \ :"=m" (*(volatile int *)rw) : : "memory") @@ -65,10 +66,10 @@ "jnz 2f\n" \ "1:\n" \ ".section .text.lock,\"ax\"\n" \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ + "2:\tpushq %%rax\n\t" \ + "leaq %0,%%rax\n\t" \ "call " helper "\n\t" \ - "popl %%eax\n\t" \ + "popq %%rax\n\t" \ "jmp 1b\n" \ ".previous" \ :"=m" (*(volatile int *)rw) : : "memory") diff --git a/xen/include/asm-x86_64/scatterlist.h b/xen/include/asm-x86_64/scatterlist.h index 9d858415db..1597d48eb0 100644 --- a/xen/include/asm-x86_64/scatterlist.h +++ b/xen/include/asm-x86_64/scatterlist.h @@ -1,5 +1,5 @@ -#ifndef _I386_SCATTERLIST_H -#define _I386_SCATTERLIST_H +#ifndef _X8664_SCATTERLIST_H +#define _X8664_SCATTERLIST_H struct scatterlist { char * address; /* Location data is to be transferred to, NULL for diff --git a/xen/include/asm-x86_64/smp.h b/xen/include/asm-x86_64/smp.h index 58a0a24d30..4c795083d1 100644 --- a/xen/include/asm-x86_64/smp.h +++ b/xen/include/asm-x86_64/smp.h @@ -5,12 +5,9 @@ #include #ifdef CONFIG_SMP -#define TARGET_CPUS cpu_online_map -#else -#define TARGET_CPUS 0x01 -#endif +#ifndef ASSEMBLY +#include -#ifdef CONFIG_SMP /* * Private routines/data */ @@ -62,7 +59,7 @@ extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial * so this is correct in the x86 case. */ -#define smp_processor_id() (current->processor) +#define smp_processor_id() read_pda(cpunumber) #include #include @@ -70,14 +67,37 @@ extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial static __inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); + return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID)); } -static __inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); -} +extern int apic_disabled; +extern int slow_smp_processor_id(void); +#define safe_smp_processor_id() \ + (!apic_disabled ? hard_smp_processor_id() : slow_smp_processor_id()) + +#endif /* !ASSEMBLY */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs misses on the L1 + * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My + * gut feeling is this will vary by board in value. For a board + * with separate L2 cache it probably depends also on the RSS, and + * for a board with shared L2 cache it ought to decay fast as other + * processes are run. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + + + +#endif +#define INT_DELIVERY_MODE 1 /* logical delivery */ +#define TARGET_CPUS 1 +#ifndef CONFIG_SMP +#define safe_smp_processor_id() 0 #endif #endif diff --git a/xen/include/asm-x86_64/spinlock.h b/xen/include/asm-x86_64/spinlock.h index 9a4fc8573d..fe89aefd7b 100644 --- a/xen/include/asm-x86_64/spinlock.h +++ b/xen/include/asm-x86_64/spinlock.h @@ -59,52 +59,9 @@ typedef struct { /* * This works. Despite all the confusion. - * (except on PPro SMP or if we are using OOSTORE) - * (PPro errata 66, 92) */ - -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) - -#define spin_unlock_string \ - "movb $1,%0" \ - :"=m" (lock->lock) : : "memory" - - -static inline void spin_unlock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#else - #define spin_unlock_string \ - "xchgb %b0, %1" \ - :"=q" (oldval), "=m" (lock->lock) \ - :"0" (oldval) : "memory" - -static inline void spin_unlock(spinlock_t *lock) -{ - char oldval = 1; -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#endif + "movb $1,%0" static inline int spin_trylock(spinlock_t *lock) { @@ -131,6 +88,18 @@ printk("eip: %p\n", &&here); :"=m" (lock->lock) : : "memory"); } +static inline void spin_unlock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + :"=m" (lock->lock) : : "memory"); +} /* * Read-write spinlocks, allowing multiple readers @@ -170,7 +139,7 @@ typedef struct { * Changed to use the same technique as rw semaphores. See * semaphore.h for details. -ben */ -/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ +/* the spinlock helpers are in arch/x86_64/kernel/semaphore.S */ static inline void read_lock(rwlock_t *rw) { diff --git a/xen/include/asm-x86_64/string.h b/xen/include/asm-x86_64/string.h index 385da59b5c..875e0e2747 100644 --- a/xen/include/asm-x86_64/string.h +++ b/xen/include/asm-x86_64/string.h @@ -1,205 +1,14 @@ -#ifndef _I386_STRING_H_ -#define _I386_STRING_H_ +#ifndef _X86_64_STRING_H_ +#define _X86_64_STRING_H_ #ifdef __KERNEL__ -#include -/* - * On a 486 or Pentium, we are better off not using the - * byte string operations. But on a 386 or a PPro the - * byte string ops are faster than doing it by hand - * (MUCH faster on a Pentium). - * - * Also, the byte strings actually work correctly. Forget - * the i486 routines for now as they may be broken.. - */ -#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486) -#include -#else -/* - * This string-include defines all string functions as inline - * functions. Use gcc. It also assumes ds=es=data space, this should be - * normal. Most of the string-functions are rather heavily hand-optimized, - * see especially strtok,strstr,str[c]spn. They should work, but are not - * very easy to understand. Everything is done entirely within the register - * set, making the functions fast and clean. String instructions have been - * used through-out, making for "slightly" unclear code :-) - * - * NO Copyright (C) 1991, 1992 Linus Torvalds, - * consider these trivial functions to be PD. - */ +/* Written 2002 by Andi Kleen */ - -#define __HAVE_ARCH_STRCPY -static inline char * strcpy(char * dest,const char *src) -{ -int d0, d1, d2; -__asm__ __volatile__( - "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2) - :"0" (src),"1" (dest) : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRNCPY -static inline char * strncpy(char * dest,const char *src,size_t count) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "1:\tdecl %2\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "rep\n\t" - "stosb\n" - "2:" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src),"1" (dest),"2" (count) : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRCAT -static inline char * strcat(char * dest,const char * src) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "decl %1\n" - "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); -return dest; -} - -#define __HAVE_ARCH_STRNCAT -static inline char * strncat(char * dest,const char * src,size_t count) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "decl %1\n\t" - "movl %8,%3\n" - "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %2,%2\n\t" - "stosb" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) - : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRCMP -static inline int strcmp(const char * cs,const char * ct) -{ -int d0, d1; -register int __res; -__asm__ __volatile__( - "1:\tlodsb\n\t" - "scasb\n\t" - "jne 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n\t" - "jmp 3f\n" - "2:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "3:" - :"=a" (__res), "=&S" (d0), "=&D" (d1) - :"1" (cs),"2" (ct)); -return __res; -} - -#define __HAVE_ARCH_STRNCMP -static inline int strncmp(const char * cs,const char * ct,size_t count) +/* Only used for special circumstances. Stolen from i386/string.h */ +static inline void * __inline_memcpy(void * to, const void * from, size_t n) { -register int __res; -int d0, d1, d2; -__asm__ __volatile__( - "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "scasb\n\t" - "jne 3f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %%eax,%%eax\n\t" - "jmp 4f\n" - "3:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "4:" - :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs),"2" (ct),"3" (count)); -return __res; -} - -#define __HAVE_ARCH_STRCHR -static inline char * strchr(const char * s, int c) -{ -int d0; -register char * __res; -__asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "movl $1,%1\n" - "2:\tmovl %1,%0\n\t" - "decl %0" - :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); -return __res; -} - -#define __HAVE_ARCH_STRRCHR -static inline char * strrchr(const char * s, int c) -{ -int d0, d1; -register char * __res; -__asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\ttestb %%al,%%al\n\t" - "jne 1b" - :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); -return __res; -} - -#define __HAVE_ARCH_STRLEN -static inline size_t strlen(const char * s) -{ -int d0; -register int __res; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" - :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); -return __res; -} - -static inline void * __memcpy(void * to, const void * from, size_t n) -{ -int d0, d1, d2; +unsigned long d0, d1, d2; __asm__ __volatile__( "rep ; movsl\n\t" "testb $2,%b4\n\t" @@ -215,286 +24,27 @@ __asm__ __volatile__( return (to); } -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as the count is constant. - */ -static inline void * __constant_memcpy(void * to, const void * from, size_t n) -{ - switch (n) { - case 0: - return to; - case 1: - *(unsigned char *)to = *(const unsigned char *)from; - return to; - case 2: - *(unsigned short *)to = *(const unsigned short *)from; - return to; - case 3: - *(unsigned short *)to = *(const unsigned short *)from; - *(2+(unsigned char *)to) = *(2+(const unsigned char *)from); - return to; - case 4: - *(unsigned long *)to = *(const unsigned long *)from; - return to; - case 6: /* for Ethernet addresses */ - *(unsigned long *)to = *(const unsigned long *)from; - *(2+(unsigned short *)to) = *(2+(const unsigned short *)from); - return to; - case 8: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - return to; - case 12: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - return to; - case 16: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); - return to; - case 20: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); - *(4+(unsigned long *)to) = *(4+(const unsigned long *)from); - return to; - } -#define COMMON(x) \ -__asm__ __volatile__( \ - "rep ; movsl" \ - x \ - : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ - : "0" (n/4),"1" ((long) to),"2" ((long) from) \ - : "memory"); -{ - int d0, d1, d2; - switch (n % 4) { - case 0: COMMON(""); return to; - case 1: COMMON("\n\tmovsb"); return to; - case 2: COMMON("\n\tmovsw"); return to; - default: COMMON("\n\tmovsw\n\tmovsb"); return to; - } -} - -#undef COMMON -} +/* Even with __builtin_ the compiler may decide to use the out of line + function. */ -#define __HAVE_ARCH_MEMCPY +#define __HAVE_ARCH_MEMCPY 1 +extern void *__memcpy(void *to, const void *from, size_t len); +#define memcpy(dst,src,len) \ + ({ size_t __len = (len); \ + void *__ret; \ + if (__builtin_constant_p(len) && __len >= 64) \ + __ret = __memcpy((dst),(src),__len); \ + else \ + __ret = __builtin_memcpy((dst),(src),__len); \ + __ret; }) -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy((t),(f),(n)) : \ - __memcpy((t),(f),(n))) - - -/* - * struct_cpy(x,y), copy structure *x into (matching structure) *y. - * - * We get link-time errors if the structure sizes do not match. - * There is no runtime overhead, it's all optimized away at - * compile time. - */ -//extern void __struct_cpy_bug (void); - -/* -#define struct_cpy(x,y) \ -({ \ - if (sizeof(*(x)) != sizeof(*(y))) \ - __struct_cpy_bug; \ - memcpy(x, y, sizeof(*(x))); \ -}) -*/ - -#define __HAVE_ARCH_MEMMOVE -static inline void * memmove(void * dest,const void * src, size_t n) -{ -int d0, d1, d2; -if (dest #include -#ifdef CONFIG_MELAN -# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */ -#else -# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -#endif - -#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) +#define CLOCK_TICK_RATE (vxtime_hz) +#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ + 1000000 / CLOCK_TICK_RATE) << (SHIFT_SCALE - SHIFT_HZ)) / HZ) /* - * Standard way to access the cycle counter on i586+ CPUs. - * Currently only used on SMP. - * - * If you really have a SMP machine with i486 chips or older, - * compile for that, and this will just always return zero. - * That's ok, it just means that the nicer scheduling heuristics - * won't work for you. - * * We only use the low 32 bits, and we'd simply better make sure * that we reschedule before that wraps. Scheduling at least every * four billion cycles just basically sounds like a good idea, - * regardless of how fast the machine is. + * regardless of how fast the machine is. */ typedef unsigned long long cycles_t; @@ -40,19 +25,43 @@ extern cycles_t cacheflush_time; static inline cycles_t get_cycles (void) { -#ifndef CONFIG_X86_TSC - return 0; -#else unsigned long long ret; - rdtscll(ret); return ret; -#endif } -extern unsigned long cpu_khz; +extern unsigned int cpu_khz; + +/* + * Documentation on HPET can be found at: + * http://www.intel.com/ial/home/sp/pcmmspec.htm + * ftp://download.intel.com/ial/home/sp/mmts098.pdf + */ + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 +#define HPET_T0_CFG 0x100 +#define HPET_T0_CMP 0x108 +#define HPET_T0_ROUTE 0x110 + +#define HPET_ID_VENDOR 0xffff0000 +#define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_NUMBER 0x00000f00 +#define HPET_ID_REV 0x000000ff + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 + +#define HPET_T0_ENABLE 0x004 +#define HPET_T0_PERIODIC 0x008 +#define HPET_T0_SETVAL 0x040 +#define HPET_T0_32BIT 0x100 -#define vxtime_lock() do {} while (0) -#define vxtime_unlock() do {} while (0) +/*extern struct vxtime_data vxtime; */ +extern unsigned long vxtime_hz; +extern unsigned long hpet_address; #endif diff --git a/xen/include/asm-x86_64/types.h b/xen/include/asm-x86_64/types.h index 2bd0f258b9..1b865c48fc 100644 --- a/xen/include/asm-x86_64/types.h +++ b/xen/include/asm-x86_64/types.h @@ -1,5 +1,5 @@ -#ifndef _I386_TYPES_H -#define _I386_TYPES_H +#ifndef _X86_64_TYPES_H +#define _X86_64_TYPES_H typedef unsigned short umode_t; @@ -17,10 +17,8 @@ typedef unsigned short __u16; typedef __signed__ int __s32; typedef unsigned int __u32; -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) typedef __signed__ long long __s64; typedef unsigned long long __u64; -#endif #include @@ -36,15 +34,9 @@ typedef unsigned int u32; typedef signed long long s64; typedef unsigned long long u64; -#define BITS_PER_LONG 32 - -/* DMA addresses come in generic and 64-bit flavours. */ +#define BITS_PER_LONG 64 -#ifdef CONFIG_HIGHMEM -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif typedef u64 dma64_addr_t; +typedef u64 dma_addr_t; #endif diff --git a/xen/include/asm-x86_64/uaccess.h b/xen/include/asm-x86_64/uaccess.h index ba19cfb2b3..3e9bc4a9bc 100644 --- a/xen/include/asm-x86_64/uaccess.h +++ b/xen/include/asm-x86_64/uaccess.h @@ -1,13 +1,13 @@ -#ifndef __i386_UACCESS_H -#define __i386_UACCESS_H +#ifndef __X86_64_UACCESS_H +#define __X86_64_UACCESS_H /* * User space memory access functions */ #include -#include #include #include +#include #include #define VERIFY_READ 0 @@ -23,8 +23,7 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF) #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) #define get_ds() (KERNEL_DS) @@ -33,23 +32,22 @@ #define segment_eq(a,b) ((a).seg == (b).seg) -extern int __verify_write(const void *, unsigned long); - -#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) +#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg))) /* - * Uhhuh, this needs 33-bit arithmetic. We have a carry.. + * Uhhuh, this needs 65-bit arithmetic. We have a carry.. */ -#define __range_ok(addr,size) ({ \ +#define __range_not_ok(addr,size) ({ \ unsigned long flag,sum; \ - asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ + asm("# range_ok\n\r" \ + "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ + :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \ flag; }) -#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) +#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0) -static inline int verify_area(int type, const void * addr, unsigned long size) +extern inline int verify_area(int type, const void * addr, unsigned long size) { return access_ok(type,addr,size) ? 0 : -EFAULT; } @@ -73,9 +71,6 @@ struct exception_table_entry unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); - /* * These are the main single-value transfer routines. They automatically @@ -84,7 +79,7 @@ extern unsigned long search_exception_table(unsigned long); * This gets kind of ugly. We want to return _two_ values in "get_user()" * and yet we don't want to do any pointers, because that is too much * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the uglyness from the user. + * and hide all the ugliness from the user. * * The "__xxx" versions of the user access functions are versions that * do not verify the address space, that must have been done previously @@ -95,24 +90,29 @@ extern unsigned long search_exception_table(unsigned long); extern void __get_user_1(void); extern void __get_user_2(void); extern void __get_user_4(void); +extern void __get_user_8(void); #define __get_user_x(size,ret,x,ptr) \ __asm__ __volatile__("call __get_user_" #size \ :"=a" (ret),"=d" (x) \ - :"0" (ptr)) + :"0" (ptr) \ + :"rbx") /* Careful: we have to cast the result to the type of the pointer for sign reasons */ #define get_user(x,ptr) \ -({ int __ret_gu=1,__val_gu; \ +({ long __val_gu; \ + int __ret_gu=1; \ switch(sizeof (*(ptr))) { \ - case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \ - case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \ - case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \ - default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \ - /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ - /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \ + case 1: _ret_gu=copy_from_user(&__val_gu,ptr,1);break; \ + case 2: _ret_gu=copy_from_user(&__val_gu,ptr,2);break; \ + case 4: _ret_gu=copy_from_user(&__val_gu,ptr,4);break; \ + case 8: _ret_gu=copy_from_user(&__val_gu,ptr,8);break; \ + default: _ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\ + /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break;*/ \ + /*default: __get_user_bad(); break;*/ \ } \ (x) = (__typeof__(*(ptr)))__val_gu; \ __ret_gu; \ @@ -125,6 +125,12 @@ extern void __put_user_8(void); extern void __put_user_bad(void); +#define __put_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __put_user_" #size \ + :"=a" (ret) \ + :"0" (ptr),"d" (x) \ + :"rbx") + #define put_user(x,ptr) \ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) @@ -135,7 +141,7 @@ extern void __put_user_bad(void); #define __put_user_nocheck(x,ptr,size) \ ({ \ - long __pu_err; \ + int __pu_err; \ __put_user_size((x),(ptr),(size),__pu_err); \ __pu_err; \ }) @@ -143,42 +149,26 @@ extern void __put_user_bad(void); #define __put_user_check(x,ptr,size) \ ({ \ - long __pu_err = -EFAULT; \ + int __pu_err = -EFAULT; \ __typeof__(*(ptr)) *__pu_addr = (ptr); \ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ __put_user_size((x),__pu_addr,(size),__pu_err); \ __pu_err; \ -}) - -#define __put_user_u64(x, addr, err) \ - __asm__ __volatile__( \ - "1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: movl %3,%0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,4b\n" \ - " .long 2b,4b\n" \ - ".previous" \ - : "=r"(err) \ - : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) +}) #define __put_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ switch (size) { \ - case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ - case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ - case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ - case 8: __put_user_u64(x,ptr,retval); break; \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\ + case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\ + case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\ default: __put_user_bad(); \ } \ } while (0) +/* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct *)(x)) @@ -187,414 +177,138 @@ struct __large_struct { unsigned long buf[100]; }; * we do not write to any memory gcc knows about, so there are no * aliasing issues. */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ - __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " jmp 2b\n" \ +#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=r"(err) \ - : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err)) #define __get_user_nocheck(x,ptr,size) \ ({ \ - long __gu_err, __gu_val; \ + int __gu_err; \ + long __gu_val; \ __get_user_size(__gu_val,(ptr),(size),__gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) -extern long __get_user_bad(void); +extern int __get_user_bad(void); #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ switch (size) { \ - case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ - case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ - case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\ + case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\ + case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\ default: (x) = __get_user_bad(); \ } \ } while (0) -#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ +#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \ __asm__ __volatile__( \ "1: mov"itype" %2,%"rtype"1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ + "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ " jmp 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ ".previous" \ : "=r"(err), ltype (x) \ - : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) - + : "m"(__m(addr)), "i"(errno), "0"(err)) /* * Copy To/From Userspace */ -/* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -/* We let the __ versions of copy_from/to_user inline, because they're often - * used in fast paths and have only a small space overhead. - */ -static inline unsigned long -__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user_zeroing(to,from,n); - return n; -} - -static inline unsigned long -__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user(to,from,n); - return n; -} - - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: shl $2,%0\n" \ - "5: addl $2,%0\n" \ - "6: incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user_zeroing(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "5: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "6: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -unsigned long __generic_copy_to_user(void *, const void *, unsigned long); -unsigned long __generic_copy_from_user(void *, const void *, unsigned long); - -static inline unsigned long -__constant_copy_to_user(void *to, const void *from, unsigned long n) -{ - prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __constant_copy_user(to,from,n); - return n; -} - -static inline unsigned long -__constant_copy_from_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - __constant_copy_user_zeroing(to,from,n); - else - memset(to, 0, n); - return n; -} - -static inline unsigned long -__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) -{ - __constant_copy_user(to,from,n); - return n; -} - -static inline unsigned long -__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) -{ - __constant_copy_user_zeroing(to,from,n); - return n; -} - -#define copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user((to),(from),(n)) : \ - __generic_copy_to_user((to),(from),(n))) - -#define copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user((to),(from),(n)) : \ - __generic_copy_from_user((to),(from),(n))) - -#define __copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user_nocheck((to),(from),(n)) : \ - __generic_copy_to_user_nocheck((to),(from),(n))) - -#define __copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user_nocheck((to),(from),(n)) : \ - __generic_copy_from_user_nocheck((to),(from),(n))) +/* Handles exceptions in both to and from, but doesn't do access_ok */ +extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); + +extern unsigned long copy_to_user(void *to, const void *from, unsigned len); +extern unsigned long copy_from_user(void *to, const void *from, unsigned len); + +static inline int __copy_from_user(void *dst, const void *src, unsigned size) +{ + if (!__builtin_constant_p(size)) + return copy_user_generic(dst,src,size); + int ret = 0; + switch (size) { + case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1); + return ret; + case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2); + return ret; + case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4); + return ret; + case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8); + return ret; + case 10: + __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); + if (ret) return ret; + __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2); + return ret; + case 16: + __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); + if (ret) return ret; + __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8); + return ret; + default: + return copy_user_generic(dst,src,size); + } +} + +static inline int __copy_to_user(void *dst, const void *src, unsigned size) +{ + if (!__builtin_constant_p(size)) + return copy_user_generic(dst,src,size); + int ret = 0; + switch (size) { + case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1); + return ret; + case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2); + return ret; + case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4); + return ret; + case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8); + return ret; + case 10: + __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10); + if (ret) return ret; + asm("":::"memory"); + __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2); + return ret; + case 16: + __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16); + if (ret) return ret; + asm("":::"memory"); + __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8); + return ret; + default: + return copy_user_generic(dst,src,size); + } +} long strncpy_from_user(char *dst, const char *src, long count); long __strncpy_from_user(char *dst, const char *src, long count); -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) long strnlen_user(const char *str, long n); +long strlen_user(const char *str); unsigned long clear_user(void *mem, unsigned long len); unsigned long __clear_user(void *mem, unsigned long len); -#endif /* __i386_UACCESS_H */ +extern unsigned long search_exception_table(unsigned long); + +#endif /* __X86_64_UACCESS_H */ diff --git a/xen/include/asm-x86_64/unaligned.h b/xen/include/asm-x86_64/unaligned.h index 7acd795762..d4bf78dc6f 100644 --- a/xen/include/asm-x86_64/unaligned.h +++ b/xen/include/asm-x86_64/unaligned.h @@ -1,8 +1,8 @@ -#ifndef __I386_UNALIGNED_H -#define __I386_UNALIGNED_H +#ifndef __X8664_UNALIGNED_H +#define __X8664_UNALIGNED_H /* - * The i386 can do unaligned accesses itself. + * The x86-64 can do unaligned accesses itself. * * The strange macros are there to make sure these can't * be misused in a way that makes them not work on other diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index 5e47e64c0e..e2220295be 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -19,29 +19,6 @@ */ #define DOM0_INTERFACE_VERSION 0xAAAA0008 -/* - * The following is all CPU context. Note that the i387_ctxt block is filled - * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. - */ -typedef struct full_execution_context_st -{ -#define ECF_I387_VALID (1<<0) - unsigned long flags; - execution_context_t i386_ctxt; /* User-level CPU registers */ - char i387_ctxt[256]; /* User-level FPU registers */ - trap_info_t trap_ctxt[256]; /* Virtual IDT */ - unsigned int fast_trap_idx; /* "Fast trap" vector offset */ - unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ - unsigned long ring1_ss, ring1_esp; /* Virtual TSS (only SS1/ESP1) */ - unsigned long pt_base; /* CR3 (pagetable base) */ - unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ - unsigned long event_callback_cs; /* CS:EIP of event callback */ - unsigned long event_callback_eip; - unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ - unsigned long failsafe_callback_eip; -} full_execution_context_t; - #define MAX_CMD_LEN 256 #define MAX_DOMAIN_NAME 16 diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index ee950c01e1..bf61d7c932 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -7,33 +7,7 @@ #ifndef __HYPERVISOR_IF_H__ #define __HYPERVISOR_IF_H__ -/* - * SEGMENT DESCRIPTOR TABLES - */ -/* - * A number of GDT entries are reserved by Xen. These are not situated at the - * start of the GDT because some stupid OSes export hard-coded selector values - * in their ABI. These hard-coded values are always near the start of the GDT, - * so Xen places itself out of the way. - * - * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY - * and LAST_RESERVED_GDT_ENTRY are reserved). - */ -#define NR_RESERVED_GDT_ENTRIES 40 -#define FIRST_RESERVED_GDT_ENTRY 256 -#define LAST_RESERVED_GDT_ENTRY \ - (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1) - -/* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0x0819 /* GDT index 259 */ -#define FLAT_RING1_DS 0x0821 /* GDT index 260 */ -#define FLAT_RING3_CS 0x082b /* GDT index 261 */ -#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ - +#include "if-arch/hypervisor-if-arch.h" /* * HYPERVISOR "SYSTEM CALLS" @@ -65,10 +39,6 @@ #define __HYPERVISOR_xen_version 22 #define __HYPERVISOR_serial_io 23 -/* And the trap vector is... */ -#define TRAP_INSTR "int $0x82" - - /* * MULTICALLS * @@ -76,7 +46,7 @@ * (BYTES_PER_MULTICALL_ENTRY). Each is of the form (op, arg1, ..., argN) * where each element of the tuple is a machine word. */ -#define BYTES_PER_MULTICALL_ENTRY 32 +#define ARGS_PER_MULTICALL_ENTRY 8 /* EVENT MESSAGES @@ -110,15 +80,6 @@ #define _EVENT_VBD_UPD 8 #define _EVENT_CONSOLE 9 /* This is only for domain-0 initial console. */ -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#define HYPERVISOR_VIRT_START (0xFC000000UL) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif - /* * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked @@ -186,21 +147,6 @@ typedef u64 domid_t; #include "network.h" #include "block.h" -/* - * Send an array of these to HYPERVISOR_set_trap_table() - */ -#define TI_GET_DPL(_ti) ((_ti)->flags & 3) -#define TI_GET_IF(_ti) ((_ti)->flags & 4) -#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) -#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -typedef struct trap_info_st -{ - unsigned char vector; /* exception vector */ - unsigned char flags; /* 0-3: privilege level; 4: clear event enable? */ - unsigned short cs; /* code selector */ - unsigned long address; /* code address */ -} trap_info_t; - /* * Send an array of these to HYPERVISOR_mmu_update() */ @@ -218,27 +164,6 @@ typedef struct unsigned long args[7]; } multicall_entry_t; -typedef struct -{ - unsigned long ebx; - unsigned long ecx; - unsigned long edx; - unsigned long esi; - unsigned long edi; - unsigned long ebp; - unsigned long eax; - unsigned long ds; - unsigned long es; - unsigned long fs; - unsigned long gs; - unsigned long _unused; - unsigned long eip; - unsigned long cs; - unsigned long eflags; - unsigned long esp; - unsigned long ss; -} execution_context_t; - /* * Xen/guestos shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. @@ -285,7 +210,6 @@ typedef struct shared_info_st { * Domain Virtual Time. Domains can access Cycle counter time directly. */ - unsigned int rdtsc_bitshift; /* tsc_timestamp uses N:N+31 of TSC. */ u64 cpu_freq; /* CPU frequency (Hz). */ /* @@ -310,6 +234,7 @@ typedef struct shared_info_st { * Allow a domain to specify a timeout value in system time and * domain virtual time. */ + u64 wall_timeout; u64 domain_timeout; @@ -322,6 +247,7 @@ typedef struct shared_info_st { net_idx_t net_idx[MAX_DOMAIN_VIFS]; execution_context_t execution_context; + arch_shared_info_t arch; } shared_info_t; diff --git a/xen/include/hypervisor-ifs/if-i386/hypervisor-if-arch.h b/xen/include/hypervisor-ifs/if-i386/hypervisor-if-arch.h index ee950c01e1..e0e6594f8c 100644 --- a/xen/include/hypervisor-ifs/if-i386/hypervisor-if-arch.h +++ b/xen/include/hypervisor-ifs/if-i386/hypervisor-if-arch.h @@ -1,11 +1,11 @@ /****************************************************************************** - * hypervisor-if.h + * hypervisor-if-i386.h * - * Interface to Xeno hypervisor. + * i386 32-bit Interface to Xeno hypervisor. */ -#ifndef __HYPERVISOR_IF_H__ -#define __HYPERVISOR_IF_H__ +#ifndef __HYPERVISOR_IF_I386_H__ +#define __HYPERVISOR_IF_I386_H__ /* * SEGMENT DESCRIPTOR TABLES @@ -24,6 +24,7 @@ #define LAST_RESERVED_GDT_ENTRY \ (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1) + /* * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid @@ -34,82 +35,10 @@ #define FLAT_RING3_CS 0x082b /* GDT index 261 */ #define FLAT_RING3_DS 0x0833 /* GDT index 262 */ - -/* - * HYPERVISOR "SYSTEM CALLS" - */ - -/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */ -#define __HYPERVISOR_set_trap_table 0 -#define __HYPERVISOR_mmu_update 1 -#define __HYPERVISOR_console_write 2 /* DEPRECATED */ -#define __HYPERVISOR_set_gdt 3 -#define __HYPERVISOR_stack_switch 4 -#define __HYPERVISOR_set_callbacks 5 -#define __HYPERVISOR_net_io_op 6 -#define __HYPERVISOR_fpu_taskswitch 7 -#define __HYPERVISOR_sched_op 8 -#define __HYPERVISOR_dom0_op 9 -#define __HYPERVISOR_network_op 10 -#define __HYPERVISOR_block_io_op 11 -#define __HYPERVISOR_set_debugreg 12 -#define __HYPERVISOR_get_debugreg 13 -#define __HYPERVISOR_update_descriptor 14 -#define __HYPERVISOR_set_fast_trap 15 -#define __HYPERVISOR_dom_mem_op 16 -#define __HYPERVISOR_multicall 17 -#define __HYPERVISOR_kbd_op 18 -#define __HYPERVISOR_update_va_mapping 19 -#define __HYPERVISOR_set_timer_op 20 -#define __HYPERVISOR_event_channel_op 21 -#define __HYPERVISOR_xen_version 22 -#define __HYPERVISOR_serial_io 23 - /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" -/* - * MULTICALLS - * - * Multicalls are listed in an array, with each element being a fixed size - * (BYTES_PER_MULTICALL_ENTRY). Each is of the form (op, arg1, ..., argN) - * where each element of the tuple is a machine word. - */ -#define BYTES_PER_MULTICALL_ENTRY 32 - - -/* EVENT MESSAGES - * - * Here, as in the interrupts to the guestos, additional network interfaces - * are defined. These definitions server as placeholders for the event bits, - * however, in the code these events will allways be referred to as shifted - * offsets from the base NET events. - */ - -/* Events that a guest OS may receive from the hypervisor. */ -#define EVENT_BLKDEV 0x01 /* A block device response has been queued. */ -#define EVENT_TIMER 0x02 /* A timeout has been updated. */ -#define EVENT_DIE 0x04 /* OS is about to be killed. Clean up please! */ -#define EVENT_DEBUG 0x08 /* Request guest to dump debug info (gross!) */ -#define EVENT_NET 0x10 /* There are packets for transmission. */ -#define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */ -#define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */ -#define EVENT_EVTCHN 0x80 /* Event pending on an event channel */ -#define EVENT_VBD_UPD 0x100 /* Event to signal VBDs should be reprobed */ - -/* Bit offsets, as opposed to the above masks. */ -#define _EVENT_BLKDEV 0 -#define _EVENT_TIMER 1 -#define _EVENT_DIE 2 -#define _EVENT_DEBUG 3 -#define _EVENT_NET 4 -#define _EVENT_PS2 5 -#define _EVENT_STOP 6 -#define _EVENT_EVTCHN 7 -#define _EVENT_VBD_UPD 8 -#define _EVENT_CONSOLE 9 /* This is only for domain-0 initial console. */ - /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. @@ -119,73 +48,8 @@ #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif - -/* - * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked - * off to get the real 'ptr' value. - * All requests specify relevent address in 'ptr'. This is either a - * machine/physical address (MA), or linear/virtual address (VA). - * Normal requests specify update value in 'value'. - * Extended requests specify command in least 8 bits of 'value'. These bits - * are masked off to get the real 'val' value. Except for MMUEXT_SET_LDT - * which shifts the least bits out. - */ -/* A normal page-table update request. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -/* Update an entry in the machine->physical mapping table. */ -#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ -/* An extended command. */ -#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */ -/* Extended commands: */ -#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */ -#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */ -#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */ -#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */ -#define MMUEXT_INVLPG 7 /* ptr = NULL ; val = VA to invalidate */ -#define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */ -/* NB. MMUEXT_SET_SUBJECTDOM must consist of *_L followed immediately by *_H */ -#define MMUEXT_SET_SUBJECTDOM_L 9 /* (ptr[31:15],val[31:15]) = dom[31:0] */ -#define MMUEXT_SET_SUBJECTDOM_H 10 /* (ptr[31:15],val[31:15]) = dom[63:32] */ -#define MMUEXT_CMD_MASK 255 -#define MMUEXT_CMD_SHIFT 8 - -/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ -#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ -#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ - -/* - * Master "switch" for enabling/disabling event delivery. - */ -#define EVENTS_MASTER_ENABLE_MASK 0x80000000UL -#define EVENTS_MASTER_ENABLE_BIT 31 - - -/* - * SCHEDOP_* - Scheduler hypercall operations. - */ -#define SCHEDOP_yield 0 /* Give up the CPU voluntarily. */ -#define SCHEDOP_block 1 /* Block until an event is received. */ -#define SCHEDOP_exit 3 /* Exit and kill this domain. */ -#define SCHEDOP_stop 4 /* Stop executing this domain. */ - -/* - * Commands to HYPERVISOR_serial_io(). - */ -#define SERIALIO_write 0 -#define SERIALIO_read 1 - #ifndef __ASSEMBLY__ -typedef u64 domid_t; -/* DOMID_SELF is used in certain contexts to refer to oneself. */ -#define DOMID_SELF (~1ULL) - -#include "network.h" -#include "block.h" - /* * Send an array of these to HYPERVISOR_set_trap_table() */ @@ -201,23 +65,6 @@ typedef struct trap_info_st unsigned long address; /* code address */ } trap_info_t; -/* - * Send an array of these to HYPERVISOR_mmu_update() - */ -typedef struct -{ - unsigned long ptr, val; /* *ptr = val */ -} mmu_update_t; - -/* - * Send an array of these to HYPERVISOR_multicall() - */ -typedef struct -{ - unsigned long op; - unsigned long args[7]; -} multicall_entry_t; - typedef struct { unsigned long ebx; @@ -240,113 +87,36 @@ typedef struct } execution_context_t; /* - * Xen/guestos shared data -- pointer provided in start_info. - * NB. We expect that this struct is smaller than a page. + * Shared info specific to the architecture in question */ -typedef struct shared_info_st { - - /* Bitmask of outstanding event notifications hypervisor -> guest OS. */ - unsigned long events; - /* - * Hypervisor will only signal event delivery via the "callback exception" - * when a pending event is not masked. The mask also contains a "master - * enable" which prevents any event delivery. This mask can be used to - * prevent unbounded reentrancy and stack overflow (in this way, acts as a - * kind of interrupt-enable flag). - */ - unsigned long events_mask; - - /* - * A domain can have up to 1024 bidirectional event channels to/from other - * domains. Domains must agree out-of-band to set up a connection, and then - * each must explicitly request a connection to the other. When both have - * made the request the channel is fully allocated and set up. - * - * An event channel is a single sticky 'bit' of information. Setting the - * sticky bit also causes an upcall into the target domain. In this way - * events can be seen as an IPI [Inter-Process(or) Interrupt]. - * - * A guest can see which of its event channels are pending by reading the - * 'event_channel_pend' bitfield. To avoid a linear scan of the entire - * bitfield there is a 'selector' which indicates which words in the - * bitfield contain at least one set bit. - * - * There is a similar bitfield to indicate which event channels have been - * disconnected by the remote end. There is also a 'selector' for this - * field. - */ - u32 event_channel_pend[32]; - u32 event_channel_pend_sel; - u32 event_channel_disc[32]; - u32 event_channel_disc_sel; - - /* - * Time: The following abstractions are exposed: System Time, Clock Time, - * Domain Virtual Time. Domains can access Cycle counter time directly. - */ - +typedef struct arch_shared_info_st { unsigned int rdtsc_bitshift; /* tsc_timestamp uses N:N+31 of TSC. */ - u64 cpu_freq; /* CPU frequency (Hz). */ - - /* - * The following values are updated periodically (and not necessarily - * atomically!). The guest OS detects this because 'time_version1' is - * incremented just before updating these values, and 'time_version2' is - * incremented immediately after. See Xenolinux code for an example of how - * to read these values safely (arch/xeno/kernel/time.c). - */ - unsigned long time_version1; /* A version number for info below. */ - unsigned long time_version2; /* A version number for info below. */ - unsigned long tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_time; /* Time, in nanosecs, since boot. */ - unsigned long wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ - unsigned long wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */ - - /* Domain Virtual Time */ - u64 domain_time; - - /* - * Timeout values: - * Allow a domain to specify a timeout value in system time and - * domain virtual time. - */ - u64 wall_timeout; - u64 domain_timeout; - - /* - * The index structures are all stored here for convenience. The rings - * themselves are allocated by Xen but the guestos must create its own - * mapping -- the machine address is given in the startinfo structure to - * allow this to happen. - */ - net_idx_t net_idx[MAX_DOMAIN_VIFS]; +} arch_shared_info_t; - execution_context_t execution_context; - -} shared_info_t; /* - * NB. We expect that this struct is smaller than a page. + * The following is all CPU context. Note that the i387_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. */ -typedef struct start_info_st { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - unsigned long nr_pages; /* total pages allocated to this domain. */ - unsigned long shared_info; /* MACHINE address of shared info struct.*/ - unsigned long flags; /* SIF_xxx flags. */ - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ - unsigned char cmd_line[1]; /* Variable-length options. */ -} start_info_t; - -/* These flags are passed in the 'flags' field of start_info_t. */ -#define SIF_PRIVILEGED 1 /* Is the domain privileged? */ -#define SIF_INITDOMAIN 2 /* Is thsi the initial control domain? */ - -/* For use in guest OSes. */ -extern shared_info_t *HYPERVISOR_shared_info; +typedef struct full_execution_context_st +{ +#define ECF_I387_VALID (1<<0) + unsigned long flags; + execution_context_t i386_ctxt; /* User-level CPU registers */ + char i387_ctxt[256]; /* User-level FPU registers */ + trap_info_t trap_ctxt[256]; /* Virtual IDT */ + unsigned int fast_trap_idx; /* "Fast trap" vector offset */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long ring1_ss, ring1_esp; /* Virtual TSS (only SS1/ESP1) */ + unsigned long pt_base; /* CR3 (pagetable base) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +} full_execution_context_t; -#endif /* !__ASSEMBLY__ */ +#endif -#endif /* __HYPERVISOR_IF_H__ */ +#endif diff --git a/xen/include/hypervisor-ifs/if-x86_64/hypervisor-if-arch.h b/xen/include/hypervisor-ifs/if-x86_64/hypervisor-if-arch.h index ee950c01e1..7aa676d5d4 100644 --- a/xen/include/hypervisor-ifs/if-x86_64/hypervisor-if-arch.h +++ b/xen/include/hypervisor-ifs/if-x86_64/hypervisor-if-arch.h @@ -1,11 +1,11 @@ /****************************************************************************** * hypervisor-if.h * - * Interface to Xeno hypervisor. + * Interface to AMD x86-64 bit Xeno hypervisor. */ -#ifndef __HYPERVISOR_IF_H__ -#define __HYPERVISOR_IF_H__ +#ifndef __HYPERVISOR_IF_X86_64_H__ +#define __HYPERVISOR_IF_X86_64_H__ /* * SEGMENT DESCRIPTOR TABLES @@ -19,173 +19,33 @@ * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY * and LAST_RESERVED_GDT_ENTRY are reserved). */ -#define NR_RESERVED_GDT_ENTRIES 40 +#define NR_RESERVED_GDT_ENTRIES 40 #define FIRST_RESERVED_GDT_ENTRY 256 #define LAST_RESERVED_GDT_ENTRY \ (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1) /* + * 64-bit segment selectors * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid * installing their own GDT. */ -#define FLAT_RING1_CS 0x0819 /* GDT index 259 */ -#define FLAT_RING1_DS 0x0821 /* GDT index 260 */ -#define FLAT_RING3_CS 0x082b /* GDT index 261 */ -#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ +#define FLAT_RING3_CS32 0x0823 /* GDT index 260 */ +#define FLAT_RING3_CS64 0x082b /* GDT index 261 */ +#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ -/* - * HYPERVISOR "SYSTEM CALLS" - */ - -/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */ -#define __HYPERVISOR_set_trap_table 0 -#define __HYPERVISOR_mmu_update 1 -#define __HYPERVISOR_console_write 2 /* DEPRECATED */ -#define __HYPERVISOR_set_gdt 3 -#define __HYPERVISOR_stack_switch 4 -#define __HYPERVISOR_set_callbacks 5 -#define __HYPERVISOR_net_io_op 6 -#define __HYPERVISOR_fpu_taskswitch 7 -#define __HYPERVISOR_sched_op 8 -#define __HYPERVISOR_dom0_op 9 -#define __HYPERVISOR_network_op 10 -#define __HYPERVISOR_block_io_op 11 -#define __HYPERVISOR_set_debugreg 12 -#define __HYPERVISOR_get_debugreg 13 -#define __HYPERVISOR_update_descriptor 14 -#define __HYPERVISOR_set_fast_trap 15 -#define __HYPERVISOR_dom_mem_op 16 -#define __HYPERVISOR_multicall 17 -#define __HYPERVISOR_kbd_op 18 -#define __HYPERVISOR_update_va_mapping 19 -#define __HYPERVISOR_set_timer_op 20 -#define __HYPERVISOR_event_channel_op 21 -#define __HYPERVISOR_xen_version 22 -#define __HYPERVISOR_serial_io 23 /* And the trap vector is... */ -#define TRAP_INSTR "int $0x82" - +#define TRAP_INSTR "syscall" -/* - * MULTICALLS - * - * Multicalls are listed in an array, with each element being a fixed size - * (BYTES_PER_MULTICALL_ENTRY). Each is of the form (op, arg1, ..., argN) - * where each element of the tuple is a machine word. - */ -#define BYTES_PER_MULTICALL_ENTRY 32 - -/* EVENT MESSAGES - * - * Here, as in the interrupts to the guestos, additional network interfaces - * are defined. These definitions server as placeholders for the event bits, - * however, in the code these events will allways be referred to as shifted - * offsets from the base NET events. - */ - -/* Events that a guest OS may receive from the hypervisor. */ -#define EVENT_BLKDEV 0x01 /* A block device response has been queued. */ -#define EVENT_TIMER 0x02 /* A timeout has been updated. */ -#define EVENT_DIE 0x04 /* OS is about to be killed. Clean up please! */ -#define EVENT_DEBUG 0x08 /* Request guest to dump debug info (gross!) */ -#define EVENT_NET 0x10 /* There are packets for transmission. */ -#define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */ -#define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */ -#define EVENT_EVTCHN 0x80 /* Event pending on an event channel */ -#define EVENT_VBD_UPD 0x100 /* Event to signal VBDs should be reprobed */ - -/* Bit offsets, as opposed to the above masks. */ -#define _EVENT_BLKDEV 0 -#define _EVENT_TIMER 1 -#define _EVENT_DIE 2 -#define _EVENT_DEBUG 3 -#define _EVENT_NET 4 -#define _EVENT_PS2 5 -#define _EVENT_STOP 6 -#define _EVENT_EVTCHN 7 -#define _EVENT_VBD_UPD 8 -#define _EVENT_CONSOLE 9 /* This is only for domain-0 initial console. */ - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#define HYPERVISOR_VIRT_START (0xFC000000UL) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif - -/* - * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked - * off to get the real 'ptr' value. - * All requests specify relevent address in 'ptr'. This is either a - * machine/physical address (MA), or linear/virtual address (VA). - * Normal requests specify update value in 'value'. - * Extended requests specify command in least 8 bits of 'value'. These bits - * are masked off to get the real 'val' value. Except for MMUEXT_SET_LDT - * which shifts the least bits out. - */ -/* A normal page-table update request. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -/* Update an entry in the machine->physical mapping table. */ -#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ -/* An extended command. */ -#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */ -/* Extended commands: */ -#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */ -#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */ -#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */ -#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */ -#define MMUEXT_INVLPG 7 /* ptr = NULL ; val = VA to invalidate */ -#define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */ -/* NB. MMUEXT_SET_SUBJECTDOM must consist of *_L followed immediately by *_H */ -#define MMUEXT_SET_SUBJECTDOM_L 9 /* (ptr[31:15],val[31:15]) = dom[31:0] */ -#define MMUEXT_SET_SUBJECTDOM_H 10 /* (ptr[31:15],val[31:15]) = dom[63:32] */ -#define MMUEXT_CMD_MASK 255 -#define MMUEXT_CMD_SHIFT 8 - -/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ -#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ -#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ - -/* - * Master "switch" for enabling/disabling event delivery. - */ -#define EVENTS_MASTER_ENABLE_MASK 0x80000000UL -#define EVENTS_MASTER_ENABLE_BIT 31 - - -/* - * SCHEDOP_* - Scheduler hypercall operations. - */ -#define SCHEDOP_yield 0 /* Give up the CPU voluntarily. */ -#define SCHEDOP_block 1 /* Block until an event is received. */ -#define SCHEDOP_exit 3 /* Exit and kill this domain. */ -#define SCHEDOP_stop 4 /* Stop executing this domain. */ - -/* - * Commands to HYPERVISOR_serial_io(). - */ -#define SERIALIO_write 0 -#define SERIALIO_read 1 - #ifndef __ASSEMBLY__ -typedef u64 domid_t; -/* DOMID_SELF is used in certain contexts to refer to oneself. */ -#define DOMID_SELF (~1ULL) - -#include "network.h" -#include "block.h" - /* * Send an array of these to HYPERVISOR_set_trap_table() */ @@ -201,41 +61,27 @@ typedef struct trap_info_st unsigned long address; /* code address */ } trap_info_t; -/* - * Send an array of these to HYPERVISOR_mmu_update() - */ -typedef struct -{ - unsigned long ptr, val; /* *ptr = val */ -} mmu_update_t; - -/* - * Send an array of these to HYPERVISOR_multicall() - */ -typedef struct -{ - unsigned long op; - unsigned long args[7]; -} multicall_entry_t; - typedef struct { - unsigned long ebx; - unsigned long ecx; - unsigned long edx; - unsigned long esi; - unsigned long edi; - unsigned long ebp; - unsigned long eax; - unsigned long ds; - unsigned long es; - unsigned long fs; - unsigned long gs; - unsigned long _unused; - unsigned long eip; + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long rip; unsigned long cs; unsigned long eflags; - unsigned long esp; + unsigned long rsp; unsigned long ss; } execution_context_t; @@ -243,109 +89,33 @@ typedef struct * Xen/guestos shared data -- pointer provided in start_info. * NB. We expect that this struct is smaller than a page. */ -typedef struct shared_info_st { - - /* Bitmask of outstanding event notifications hypervisor -> guest OS. */ - unsigned long events; - /* - * Hypervisor will only signal event delivery via the "callback exception" - * when a pending event is not masked. The mask also contains a "master - * enable" which prevents any event delivery. This mask can be used to - * prevent unbounded reentrancy and stack overflow (in this way, acts as a - * kind of interrupt-enable flag). - */ - unsigned long events_mask; - - /* - * A domain can have up to 1024 bidirectional event channels to/from other - * domains. Domains must agree out-of-band to set up a connection, and then - * each must explicitly request a connection to the other. When both have - * made the request the channel is fully allocated and set up. - * - * An event channel is a single sticky 'bit' of information. Setting the - * sticky bit also causes an upcall into the target domain. In this way - * events can be seen as an IPI [Inter-Process(or) Interrupt]. - * - * A guest can see which of its event channels are pending by reading the - * 'event_channel_pend' bitfield. To avoid a linear scan of the entire - * bitfield there is a 'selector' which indicates which words in the - * bitfield contain at least one set bit. - * - * There is a similar bitfield to indicate which event channels have been - * disconnected by the remote end. There is also a 'selector' for this - * field. - */ - u32 event_channel_pend[32]; - u32 event_channel_pend_sel; - u32 event_channel_disc[32]; - u32 event_channel_disc_sel; - - /* - * Time: The following abstractions are exposed: System Time, Clock Time, - * Domain Virtual Time. Domains can access Cycle counter time directly. - */ - +typedef struct arch_shared_info_st { unsigned int rdtsc_bitshift; /* tsc_timestamp uses N:N+31 of TSC. */ - u64 cpu_freq; /* CPU frequency (Hz). */ - - /* - * The following values are updated periodically (and not necessarily - * atomically!). The guest OS detects this because 'time_version1' is - * incremented just before updating these values, and 'time_version2' is - * incremented immediately after. See Xenolinux code for an example of how - * to read these values safely (arch/xeno/kernel/time.c). - */ - unsigned long time_version1; /* A version number for info below. */ - unsigned long time_version2; /* A version number for info below. */ - unsigned long tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_time; /* Time, in nanosecs, since boot. */ - unsigned long wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ - unsigned long wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */ - - /* Domain Virtual Time */ - u64 domain_time; - - /* - * Timeout values: - * Allow a domain to specify a timeout value in system time and - * domain virtual time. - */ - u64 wall_timeout; - u64 domain_timeout; +} arch_shared_info_t; - /* - * The index structures are all stored here for convenience. The rings - * themselves are allocated by Xen but the guestos must create its own - * mapping -- the machine address is given in the startinfo structure to - * allow this to happen. - */ - net_idx_t net_idx[MAX_DOMAIN_VIFS]; - - execution_context_t execution_context; - -} shared_info_t; /* - * NB. We expect that this struct is smaller than a page. + * The following is all CPU context. Note that the i387_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. */ -typedef struct start_info_st { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - unsigned long nr_pages; /* total pages allocated to this domain. */ - unsigned long shared_info; /* MACHINE address of shared info struct.*/ - unsigned long flags; /* SIF_xxx flags. */ - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ - unsigned char cmd_line[1]; /* Variable-length options. */ -} start_info_t; - -/* These flags are passed in the 'flags' field of start_info_t. */ -#define SIF_PRIVILEGED 1 /* Is the domain privileged? */ -#define SIF_INITDOMAIN 2 /* Is thsi the initial control domain? */ - -/* For use in guest OSes. */ -extern shared_info_t *HYPERVISOR_shared_info; +typedef struct full_execution_context_st +{ +#define ECF_I387_VALID (1<<0) + unsigned long flags; + execution_context_t x86_64_ctxt; /* User-level CPU registers */ + char i387_ctxt[512]; /* User-level FPU registers */ + trap_info_t trap_ctxt[256]; /* Virtual IDT */ + unsigned int fast_trap_idx; /* "Fast trap" vector offset */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long ring1_ss, ring1_esp; /* Virtual TSS (only SS1/ESP1) */ + unsigned long pt_base; /* CR3 (pagetable base) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +} full_execution_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 475b0924ed..2e17b6b526 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -297,26 +298,4 @@ extern struct task_struct *task_list; extern void update_process_times(int user); -#include -static inline void load_LDT(struct task_struct *p) -{ - unsigned int cpu; - struct desc_struct *desc; - unsigned long ents; - - if ( (ents = p->mm.ldt_ents) == 0 ) - { - __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) ); - } - else - { - cpu = smp_processor_id(); - desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu); - desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1); - desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 | - ((LDT_VIRT_START&0xff0000)>>16); - __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) ); - } -} - #endif -- 2.30.2